In [1]:
from __future__ import division
from numpy import *
from matplotlib.pyplot import *
import scipy.linalg
import pandas as pd
import csv
import time
import gc

## Functions for data and Time Serie generation

In [2]:
def loadData(file='data.csv'):
    """
Function to load data from file, returning the normalized data, unnorrmalized targets, mean and standard deviation
   :return: data,val_data,data mean and data standar deviation
    """
    data = np.genfromtxt('Resources/' + file, delimiter=',')
    return data

In [3]:
def normalizeData(data):
    """
    Function to normalize data with mean and standard deviation
    :param data: data values
    :return: normalized data, mean, std
    """
    mean = np.mean(data)
    std = np.std(data)
    norm = (data-mean)/std
    return norm,mean,std

In [4]:
def denormalize(data,mean,std):
    """
    Function to denormalize data with mean and standard deviation
    :param data: dato to be denormalized
    :param mean: mean of the original unnormalized data
    :param std: standard deviation from the original unnormalized data
    :return: denormalized data
    """
    data = (data*std) + mean
    return data

In [85]:
def getTimeSerieFromFile(file='data.csv',start_date="2001-01-01",freq = 'D',normalize=True):
    """
Function to load Time Serie from file and normalize
    :param file: data file in Resources folder
    :param start_date: define start date of the Time Serie, is a string in format 'YYYY-MM-DD' by defaukt '2001-01-01'
    :param freq: frequency of the Time serie, by default is daily, visit  http://pandas.pydata.org/pandas-docs/stable/timeseries.html
    :param normalize: normalize Data if True, by default is Ture
    :return: Time Serie,mean,std
    """
    data = loadData(file)
    index_ = pd.date_range(start_date,periods=len(data),freq=freq)
    mean = 0
    std = 0
    if(normalize):
        data,mean,std = normalizeData(data)
    TS= pd.Series(data,index = index_)
    return TS,mean,std

In [91]:
def getTimeSerieFromData(data,start_date="2001-01-01",freq = 'D',normalize=True):
    """
Function to create Time Serie from data and normalize
    :param data: data to be processed
    :param start_date: define start date of the Time Serie, is a string in format 'YYYY-MM-DD' by defaukt '2001-01-01'
    :param freq: frequency of the Time serie, by default is daily, visit  http://pandas.pydata.org/pandas-docs/stable/timeseries.html
    :param normalize: normalize Data if True, by default is Ture
    :return: Time Serie,mean,std
    """
    index_ = pd.date_range(start_date,periods=len(data),freq=freq)
    mean = 0
    std = 0
    if(normalize):
        data,mean,std = normalizeData(data)
    TS= pd.Series(data,index = index_)
    return TS,mean,std

### Function to Define Validation Train and Test TS

In [6]:
def getValidationTrainAndTestTS(TS,start=0,end=6,lenght=6):
    """
    Function to retrieve Validation Training and Test TS
    :param TS: Time Serie
    :param start: index from TS to start Training TS, by default = 0
    :param end: index from TS to end Training TS also used as start for Test TS, by default = 6
    :param lenght: lenght of Test TS, by default = 6, if is equal to end then will return to end of TS
    :return: Training TS, Test TS
    """
    try:
        trainTS = TS[start:-end].copy()
        if(lenght<start):
            testTS = TS[-end:-(end-lenght)]
        else:
            testTS = TS[-end:]
        return trainTS,testTS
    except Exception as e:
        print(e.value)         

### Function to Define Test Train TS

In [7]:
def getTestTrainTS(TS,start=0,end=None):
    """
Retrieve Test Training TS, by default the original TS
    :param TS: Time Serie
    :param start: index from original TS to start Test Training TS
    :param end: index from original TS to end Test Training TS, by default = None
    :return: Test Training TS
    """
    if(end is None):
        return TS[start:].copy()
    else:
        return TS[start:end].copy()

### Function to initilalize Win matrix

In [8]:
def generateWinMatrix(inSize, resSize, scaleFactor=1):
    """
Function to generate W in matrix with a uniform distribution between [-1,1]
in other Function the uniform distribution can be moved to any range
    :param inSize: number of inputs without interceptor
    :param resSize: size of reservoir
    :param scaleFactor: Scale Factor for the matrix by default 1
    :return: W in Matrix
    """
    Win = (random.uniform(low=-0.5, high=0.5, size=(resSize, 1 + inSize))) * scaleFactor
    return Win

### Function to initialize W matrix

In [9]:
def getSpectralRadius(W):
    """
Function to get the spectral radius of W matrix
    :param W: W matrix
    :return: spectral radius
    """
    rhoW = max(abs(linalg.eig(W)[0]))
    return  rhoW

In [10]:
def generateW(resSize, SRS = 1):
    """
Function to generate W matrix with a uniform distribution between [-1,1] and apply Spectral radius Scale
    :param resSize: size of reservoir
    :param SRS: spectral radius scale factor by default 1
    :return: W
    """
    W = random.uniform(low=-0.5, high= 0.5, size = (resSize,resSize))
    rhoW = getSpectralRadius(W)
    W*= (SRS/ rhoW)
    return W

### Function to get X and Yt matrix

In [11]:
def generateX_YtMatrix(data,inSize, resSize,initLen):
    """
Function to get X and Yt Matrix to save activation states and prediction targets
    :type data: normalized data
    :param inSize: size of the input 
    :param resSize: size of reservoir
    :param initLen:  length of initial data
    :return: X, Yt
    """
    X = zeros((1+inSize+resSize,len(data)-initLen-1))
    Yt = data[None,initLen+1:]
    return X,Yt

### Function to generate Win,W, X and Yt Matrix

In [12]:
def generateMatrix(data,resSize,initLen = 12,inSize=1, WinScaleFactor=1, SRS = 1):
    """
    Function to generate Win, W, X and Yt matrix
    :param data: values of training Time Serie
    :param resSize: size of reservoir (num of neurons)
    :param initLen: number of values to be used as lags for activations states X matrix, by default = 12
    :param WinScaleFactor: scale factor for Win matrix, by default = 1
    :param SRS: Spectral radius scale factor for W matrix, by default = 1
    :return: Win,W,X,Xt
    """
    Win = generateWinMatrix(inSize,resSize,WinScaleFactor)
    W = generateW(resSize,SRS)
    X,Yt = generateX_YtMatrix(data,inSize,resSize,initLen)
    return Win,W,X,Yt

## Get Activation states function

In [13]:
def getActivationStates(data,resSize,Win,W,a,X,initLen=12):
    """
Function to get the activations states from the reservoir, data's lenght is always 1 lesser
    :param data: train data
    :param resSize: reservoir size
    :param Win: W in Matrix
    :param W: W matrix
    :param a: leaking rate
    :param X: matrix of Activations per input
    :param initLen: inital data to be ignored, by default is 12
    :return: last activation vector, matrix of activations X
    """
    x = zeros((resSize,1))
    for t in range(len(data)-1):
        u = data[t]
        x_u = tanh( dot( Win, vstack((1,u)) ) + dot( W, x ) )
        #x_u = sigmoid( dot( Win, vstack((1,u)) ) + dot( W, x ) )
        x = (1-a)*x + a*x_u 
        if t >= initLen:
            X[:,t-initLen] = vstack((1,u,x))[:,0]
    return x,X

## Train Output function

In [14]:
def trainOutput(inSize,resSize,X,Yt,reg = 1e-8):
    """
Function to train readout with ridge regression to address overfitting
    :param inSize: size of input
    :param resSize: reservoir size
    :param reg: penalization parameter, by default is 1e-8
    :param X: Matrix of activations
    :param Yt: target values
    :return Wout: weights from reservoir and input to output layer
    """
    X_T = X.T 
    Wout = dot( dot(Yt,X_T), linalg.inv( dot(X,X_T) + reg*eye(1+inSize+resSize) ) )
    return Wout

## Get Predictions

In [15]:
def getPredictions(data,outSize,a,Win,W,Wout,x,testLen=6):
    """
Function to get the predictions of the normalized data
    :param data: train data
    :param outSize: output size
    :param a: leaking rate
    :param Win: W in Matrix
    :param W: W matrix
    :param Wout: W out Matrix
    :param x: last activations states
    :param testLen: length of test data, by default = 6
    :return: vector of predictions
    """
    Y = zeros((outSize,testLen))
    u = data[-1]
    for t in range(testLen):
        x_update = tanh( dot( Win, vstack((1,u))) + dot( W, x )) 
        #x_update = sigmoid( dot( Win, vstack((1,u))) + dot( W, x ))
        x = (1-a)*x + a*x_update
        y = dot( Wout, vstack((1,u,x)) )
        Y[:,t] = y
        # generative mode, this take the prediction and use it to make other prediction:
        u = y
        ## this would be a predictive mode, this made a prediccion and save it, then use the true value to make a new prediction:
        #u = data[trainLen+t+1]
    return Y

In [88]:
def createForecastTS(data,lastDate,freq='D'):
    """
    Function to create a TS for forecast values
    :param data: forecast values
    :param lastDate: last Date in TS index, must be timestamp
    :param freq: frequency of the TS, by defaul = 'D'
    :return: Forecast Time Serie
    """
    start_date = lastDate + pd.Timedelta(days = 1)
    index_ = pd.date_range(start_date,periods=len(data),freq=freq)
    TS = pd.Series(data,index = index_)
    return TS  

### Function to Run test predictions from file or from Data

In [90]:
def TestESNfromFile(file='',setup=(10,1,1,0.3,1e-8),inSize=1,outSize=1,initLen=12,startDate = "2001-01-01",freq='M',norm=True):
    """
    Function to test ESN with data from file
    :param file: file to be read to get values
    :param setup: list of values to setup the ESN (resSize,WinScale,SRS,Leak rate,L2 penalty), by default (10,1,1,0.3,1e-8)
    :param inSize: size of inputs, by default is 1
    :param outSize: size of outputs, by default is 1
    :param initLen: lenght of initial values
    :param startDate: date to begin the TS, must be in format (YYYY-mm-dd), by default (2001-01-01)
    :param freq: frequency of the TS
    :param norm: True if data must be normalized
    :return: forecast Time Serie
    """
    resSize,winScale,SRS,a,l2 = setup
    TS,mean,std = getTimeSerie(freq=freq, normalize = norm)
    TestTrainTS = getTestTrainTS(TS)
    Win,W,X,Yt = generateMatrix(TestTrainTS.values,resSize)
    x,X = getActivationStates(TestTrainTS.values,resSize,Win,W,a,X)
    Wout = trainOutput(1,10,X,Yt)
    Y = getPredictions(TestTrainTS.values,outSize,a,Win,W,Wout,x)
    if(norm):
        Y = denormalize(Y,mean,std)
    forecastTS = createForecastTS(Y[0],TS.index[-1],freq)
    return forecastTS

In [92]:
def TestESNfromData(data,setup=(10,1,1,0.3,1e-8),inSize=1,outSize=1,initLen=12,startDate = "2001-01-01",freq='M',norm=True):
    """
    Function to test ESN with data
    :param data: data to be processed
    :param setup: list of values to setup the ESN (resSize,WinScale,SRS,Leak rate,L2 penalty), by default (10,1,1,0.3,1e-8)
    :param inSize: size of inputs, by default is 1
    :param outSize: size of outputs, by default is 1
    :param initLen: lenght of initial values
    :param startDate: date to begin the TS, must be in format (YYYY-mm-dd), by default (2001-01-01)
    :param freq: frequency of the TS
    :param norm: True if data must be normalized
    :return: forecast Time Serie"""
    resSize,winScale,SRS,a,l2 = setup
    TS,mean,std = getTimeSerieFromData(freq=freq, normalize = norm)
    TestTrainTS = getTestTrainTS(TS)
    Win,W,X,Yt = generateMatrix(TestTrainTS.values,resSize)
    x,X = getActivationStates(TestTrainTS.values,resSize,Win,W,a,X)
    Wout = trainOutput(1,10,X,Yt)
    Y = getPredictions(TestTrainTS.values,outSize,a,Win,W,Wout,x)
    if(norm):
        Y = denormalize(Y,mean,std)
    forecastTS = createForecastTS(Y[0],TS.index[-1],freq)
    return forecastTS

# comienzan pruebas

In [16]:
TS,mean,std = getTimeSerie(freq='M')

In [17]:
TestTrainTS = getTestTrainTS(TS)

In [89]:
ValTrainTS,ValTestTS = getValidationTrainAndTestTS(TS)

In [18]:
Win,W,X,Yt = generateMatrix(TestTrainTS.values,10)

In [20]:
x,X = getActivationStates(TestTrainTS.values,10,Win,W,0.3,X)

In [21]:
Wout = trainOutput(1,10,X,Yt)

In [32]:
Y = getPredictions(TestTrainTS.values,1,0.3,Win,W,Wout,x)

In [33]:
Y = denormalize(Y,mean,std)

In [34]:
Y

array([[ 0.21599055,  0.23255586,  0.26767315,  0.21728375,  0.15894693,
         0.1108751 ]])