In [1]:
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.preprocessing import RobustScaler
from sklearn.metrics import r2_score
from sklearn.metrics import mean_squared_error
import matplotlib.pyplot as plt

In [2]:
def model(A, THETA):
    """
    Multivariate Linear Regression Model, Yh = A*THETA
    The matrix A is sometimes called the design matrix.
    """
    return A.dot(THETA)

In [3]:
# Design Matrix
def designMatrix(Tau,X):
    q,_ = X.shape
    for p in range(q):
        M = powerVector(Tau,X[p,:])
        if p == 0:
            A = M
            continue
        A = np.vstack((A,M))
    return A

# Power Vector
def powerVector(Tau,V):
    if V.size == 0 or Tau == 0:
        return 1
    else:
        M = np.array([])
        Z = V[:-1]
        W = V[-1]
        for k in range(Tau+1):
            value = powerVector(Tau-k,Z)*(W**k)
            M = np.append(M,value)
        return M

In [4]:
# Polynomial parameter number
def polyParamsNumber(n,tau):
    s = 0
    for l in range(tau+1):
        val = np.math.factorial(l+n-1)/np.math.factorial(n-1)
        val = val/np.math.factorial(l)
        s = s + val
    return int(s)

In [5]:
def loss(Y_true, Y_pred, THETA, lambda_param):
    """ Mean Squared Error """
    E = Y_true - Y_pred
    SSE = np.square(np.linalg.norm(E, 'fro'))
    Reg = (lambda_param/(2*E.shape[0]))*np.square(np.linalg.norm(THETA[1:,:], 'fro'))
    MSE = SSE/(2*E.shape[0]) + Reg
    return MSE

In [6]:
def gradient(A,E,THETA,lambda_param):
    """ MSE Gradient """
    SSEGrad = -1.0*(A.T).dot(E)
    MSEGrad = SSEGrad/E.shape[0]
    MSEGrad[1:,:] = MSEGrad[1:,:] + (lambda_param/E.shape[0])*THETA[1:,:]
    return MSEGrad

In [7]:
# Basic RMSprop
def rmsprop_optimization(
    X,
    Y,
    tau=1,
    lambda_param=0.0,
    maxEpochs=100,
    show=10,
    batch_size=16,
    learning_rate=0.01,
    stopping_threshold=1e-6,
    beta=0.9,
    epsilon=1e-8,
):
    """
    RMSprop optimization with support for mini-batches.
    """
    # Initialize the model parameters randomly
    n = X.shape[1]
    m = Y.shape[1]
    rho = polyParamsNumber(n,tau)
    THETA = np.random.randn(rho,m)
    # Initialize accumulators for squared gradients
    this = "RMSprop"
    S = np.zeros((rho,m)) # Momentum
    q = X.shape[0]
    n_minibatches = q // batch_size
    previous_loss = np.inf

    for epoch in range(maxEpochs+1):
        # Shuffle the data
        indices = np.random.permutation(q)

        X = X[indices,:]
        Y = Y[indices,:]

        for j in range(n_minibatches):
            X_Batch = X[j*batch_size : (j+1)*batch_size,:]            
            Y_Batch = Y[j*batch_size : (j+1)*batch_size,:]
            # Compute the gradients
            A_Batch = designMatrix(tau,X_Batch)
            Y_pred = model(A_Batch,THETA)
            E_Batch = Y_Batch-Y_pred
            Grad = gradient(A_Batch,E_Batch,THETA,lambda_param)

            # Update accumulators
            S = beta * S + (1 - beta) * (Grad**2)

            # Update parameters
            THETA -= learning_rate * Grad / (np.sqrt(S) + epsilon)
            
        if q % batch_size != 0:
            X_Batch = X[(j+1)*batch_size : q,:]            
            Y_Batch = Y[(j+1)*batch_size : q,:]
            # Compute the gradients
            A_Batch = designMatrix(tau,X_Batch)
            Y_pred = model(A_Batch,THETA)
            E_Batch = Y_Batch-Y_pred
            Grad = gradient(A_Batch,E_Batch,THETA,lambda_param)

            # Update accumulators
            S = beta * S + (1 - beta) * (Grad**2)

            # Update parameters
            THETA -= learning_rate * Grad / (np.sqrt(S) + epsilon)

        # Compute the loss
        A = designMatrix(tau,X)
        Yh = model(A,THETA)
        current_loss = loss(Y, Yh, THETA, lambda_param)

        # Progress
        if (np.fmod(epoch,show) == 0):
            print(this,end = ": ")
            if np.isfinite(maxEpochs):
                print("Epoch ",epoch, "/", maxEpochs,end = " ")
            if np.isfinite(stopping_threshold):
                print(", Performance %8.3e" % current_loss, "/", stopping_threshold, end = " ")
                
            print("\n")

        if abs(previous_loss - current_loss) < stopping_threshold:
            break

        previous_loss = current_loss

    return THETA

In [None]:

# Load dataset
data = np.loadtxt('challenge01_syntheticdataset22.txt',delimiter=',')
inputs  = data[:,0:2]
targets = data[:,2:4]

In [None]:
#los datos ya estan normzalizados 
# Data normalization inputs and targets
# Initialize the RobustScaler
scalerInputs  = RobustScaler()
scalerTargets = RobustScaler()
# Transform the data
robust_scaled_Inputs  = scalerInputs.fit_transform(inputs)
robust_scaled_Targets = scalerTargets.fit_transform(targets)

In [10]:
# Train and Test Split Data
inputs_train, inputs_test, targets_train, targets_test = train_test_split(robust_scaled_Inputs, robust_scaled_Targets, random_state = 1, test_size = 0.4)

In [11]:
# Train and Test Data
# Train Data
xTrain = inputs_train
tTrain = targets_train
# Test Data
xTest = inputs_test
tTest = targets_test

In [12]:
# Find the optimal parameters m and b with RMSprop
tau = 1
lambda_param = 0.0

THETA = rmsprop_optimization(
    xTrain,
    tTrain,
    tau,
    lambda_param,
    maxEpochs=1000,
    show=10,
    batch_size=8,
    learning_rate=0.01,
    stopping_threshold=1e-6,
    beta=0.9,
    epsilon=1e-8,
)

RMSprop: Epoch  0 / 1000 , Performance 1.562e+00 / 1e-06 

RMSprop: Epoch  10 / 1000 , Performance 4.658e-01 / 1e-06 

RMSprop: Epoch  20 / 1000 , Performance 9.994e-02 / 1e-06 

RMSprop: Epoch  30 / 1000 , Performance 3.495e-03 / 1e-06 

RMSprop: Epoch  40 / 1000 , Performance 2.644e-04 / 1e-06 

RMSprop: Epoch  50 / 1000 , Performance 2.460e-04 / 1e-06 

RMSprop: Epoch  60 / 1000 , Performance 4.498e-05 / 1e-06 

RMSprop: Epoch  70 / 1000 , Performance 3.911e-05 / 1e-06 

RMSprop: Epoch  80 / 1000 , Performance 3.666e-05 / 1e-06 

RMSprop: Epoch  90 / 1000 , Performance 3.102e-05 / 1e-06 

RMSprop: Epoch  100 / 1000 , Performance 2.641e-05 / 1e-06 

RMSprop: Epoch  110 / 1000 , Performance 2.157e-06 / 1e-06 



In [13]:
# Make predictions
# Train data
A_train = designMatrix(tau,xTrain)
outputTrain = model(A_train,THETA)
# Test data
A_test = designMatrix(tau,xTest)
outputTest = model(A_test,THETA)

In [14]:
# Inverse_transform in RobustScaler
# Inverse transformation of training data for the outputs
yTrain = scalerTargets.inverse_transform(tTrain)
yhTrain = scalerTargets.inverse_transform(outputTrain)
# Inverse transformation of the test data for the outputs
yTest  = scalerTargets.inverse_transform(tTest)
yhTest = scalerTargets.inverse_transform(outputTest)

In [15]:
# R2 for raw train data
R2_train = r2_score(yTrain.reshape(-1, 1),yhTrain.reshape(-1, 1))
print(R2_train)

0.9999976736395545


In [16]:
# MSE for raw train data
MSE_train = mean_squared_error(yTrain.reshape(-1, 1),yhTrain.reshape(-1, 1))
print(MSE_train)

0.000586168231344461


In [17]:
# R2 for raw test data
R2_test = r2_score(yTest.reshape(-1, 1),yhTest.reshape(-1, 1))
print(R2_test)

0.9999975029423862


In [18]:
# MSE for raw test data
MSE_test = mean_squared_error(yTest.reshape(-1, 1),yhTest.reshape(-1, 1))
print(MSE_test)

0.0006147069761916673


In [19]:
THETA

array([[-8.75482735e-04, -2.11584914e-04],
       [-9.71276446e-01,  7.84296760e-01],
       [ 6.80553144e-01, -9.82431657e-01]])