In [None]:
import numpy as np
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.tools as st
import pandas as pd

In [None]:
def compute_W_sad(phi, psi, eta_phi, eta_psi, x, a, z, xx, max_iter, W_0, K_0):

    """ Compute W^{sad} straightly

    Parameters
    ----------
    phi : function
        Feature map. phi : S * A -> R^{d_{phi}}
    psi : function
        Feature map. psi : Z -> R^{d_{psi}}
    eta_phi: array
        Stepsize in the stochastic gradient. It is a numpy array with shape ''(1, T)''
    eta_psi: array
        Stepsize in the stochastic gradient. It is a numpy array with shape ''(1, T)''
    x : array
        It's a given state sample matrix. It can be a numpy matirx with shape ''(d_{x}, T)'', where T is the parameter iter.
    a : array
        It's a given action sample matrix. It can be a numpy matirx with shape ''(d_{a}, T)'', where T is the parameter iter.
    z : array
        It's a given instrumental variable sample matrix. It can be a numpy matirx with shape ''(d_{z}, T)'', where T is the parameter iter.
    xx : array
        It's a given next state sample matrix. It can be a numpy matirx with shape ''(d_{x}, T)'', where T is the parameter iter.
    max_iter : int
        Number of iterations.
    W_0 : array
        It's an initial guess for output W. W_0 can be a numpy matirx with shape ''(d_{x}, d_{phi})''.
    K_0 : array
        It's an initial guess for K. K_0 can be a numpy matirx with shape ''(d_{x}, d_{psi})''.

    Output
    --------
    W_now : array
        It's an estimation matrix for the true W_sad with shape ''(d_{x}, d_{phi})''
    """

    W_now = W_0.copy()
    K_now = K_0.copy()
    W_next = W_0.copy()
    K_next = K_0.copy()

    for t in range(max_iter):
        Phi = np.matrix(phi(x[:, t],a[:, t]))
        Psi = np.matrix(psi(x[:,t],z[:, t]))
        W_next = W_now - eta_phi[t] * np.dot(K_now,np.dot(Psi, np.transpose(Phi)))
        K_next = K_now + eta_psi[t] * (np.dot(K_now,np.dot(Psi, np.transpose(Psi))) + np.dot([xx[:, t]], np.transpose(Psi)) - np.dot(W_now, np.dot(Phi, np.transpose(Psi))))
        W_now = W_next.copy()
        K_now = K_next.copy()

    return W_now


# Use mini batch stochastic gradient descent.
def compute_W_sad2(phi, psi, eta_phi, eta_psi, x, a, z, xx, max_iter, W_0, K_0, size, feature_size, choose):

    """ Compute W^{sad} straightly

    Parameters
    ----------
    phi : function
        Feature map. phi : S * A -> R^{d_{phi}}
    eta_phi: array
        Stepsize in the stochastic gradient. It is a numpy array with shape ''(1, T)''
    x : array
        It's a given state sample matrix. It can be a numpy matirx with shape ''(d_{x}, T)'', where T is the parameter iter.
    a : array
        It's a given action sample matrix. It can be a numpy matirx with shape ''(d_{a}, T)'', where T is the parameter iter.
    z : array
        It's a given instrumental variable sample matrix. It can be a numpy matirx with shape ''(d_{z}, T)'', where T is the parameter iter.
    xx : array
        It's a given next state sample matrix. It can be a numpy matirx with shape ''(d_{x}, T)'', where T is the parameter iter.
    max_iter : int
        Number of iterations.
    W_0 : array
        It's an initial guess for output W. W_0 can be a numpy matirx with shape ''(d_{x}, d_{phi})''.

    Output
    --------
    W_now : array
        It's an estimation matrix for the true W_sad with shape ''(d_{x}, d_{phi})''
    """

    W_now = W_0.copy()
    K_now = K_0.copy()
    W_next = W_0.copy()
    K_next = K_0.copy()

    for i in range(int(max_iter/size)):
        A = np.zeros((feature_size, feature_size))
        B = np.zeros((feature_size, feature_size))
        C = np.zeros((1, feature_size))
        for j in range(size):
            Phi = phi(np.array([x[:, i * size + j]]), np.array([a[:, i * size + j]]))
            Psi = psi(np.array([x[:, i * size + j]]), np.array([z[:, i * size + j]]))
            A = A + 1 / size * np.dot(Psi, np.transpose(Phi))
            B = B + 1 / size * np.dot(Psi, np.transpose(Psi))
            C = C + 1 / size * np.dot(np.array([xx[:, i * size + j]]), np.transpose(Psi))

        A = np.float64(A)
        B = np.float64(B)
        C = np.float64(C)
        for j in range(150):
            K_next = K_now + eta_psi[j] * (np.dot(K_now, B) + C - np.dot(W_now, np.transpose(A)))
            K_now = K_next.copy()
        W_next = W_now - eta_phi[i] * np.dot(K_now, A)
        W_now = W_next.copy()

    return W_now


def phi(x,a):

    """
    The feature map
    :param x: a state. The shape is ''(d_{x}, 1)''
    :param a: an action. The shape is ''(d_{a}, 1)''
    :return:
    """
    return np.array([[1], x, a, x**2, a**2, x**3, a**3])

def psi(x,z):

    """
    The feature map
    :param z: an instrumental variable. The shape is ''(d_{z}, 1)''
    :return:
    """
    return np.array([[1], x, z, x**2, z**2, x**3, z**3])

def generate_action(x,z,e):

    """
    A function to generate action while collecting data.
    """

    return np.clip(np.random.normal(z+e, 0.5), -1, 1)
    # return np.random.normal(z+e,1)

def F(x, a):

    """
    A deterministic transition function we want to approach.
    """

    return np.log(np.abs(x-1) + 1) - 1/2 * a** 3


In [None]:
sigma_e_list = [1,1,1]
sigma_z_list = [0.8,0.9,1.5]

for choose in range(3):
    sigma_e = sigma_e_list[choose]
    sigma_z = sigma_z_list[choose]

    # Data collection
    max_iter = 80000
    size = 256
    horizon = 1000
    feature_size = 7

    x = np.array([[0]])  # initial state
    z = np.array([[np.random.normal(loc=0, scale=sigma_z)]])
    e = np.array([[np.random.normal(loc=0, scale=sigma_e)]])
    a = np.array(generate_action(x,z,e))  # get the action
    x_next = np.array(F(x,a)+e)  # get the next state
    xx = x_next.copy()  # store the next state

    # Collecting the data step by step
    for i in range(max_iter):
        if i % horizon == 0:
            x_next = np.zeros((1,1)) 
        x = np.hstack((x,x_next))
        z_next = np.array([[np.random.normal(loc=0, scale=sigma_z)]])
        e_next = np.array([[np.random.normal(loc=0, scale=sigma_e)]])
        # e_next = np.array([[1]])
        z = np.hstack((z,z_next))
        e = np.hstack((e,e_next))
        a_next = np.array([generate_action(x[:,i+1],z[:,i+1],e[:,i+1])])
        x_next = np.array(F(x_next,a_next)+e_next)
        a = np.hstack((a,a_next))
        xx = np.hstack((xx,x_next))


    A = np.zeros((feature_size,feature_size))
    B = np.zeros((feature_size,feature_size))
    C = np.zeros((1,feature_size))
    for i in range(max_iter):
        Phi = phi(np.array([x[:,i]]),np.array([a[:,i]]))
        Psi = psi(np.array([x[:,i]]),np.array([z[:,i]]))
        A = A + 1/max_iter * np.dot(Psi, np.transpose(Phi))
        B = B + 1/max_iter * np.dot(Psi, np.transpose(Psi))
        C = C + 1/max_iter * np.dot(np.array([xx[:,i]]),np.transpose(Psi))

    # Compute Closed Form
    # The closed form for W^{*} is CB^{-1}A(A^{T}B^{-1}A)^{-1}
    # use sample to estimate covariance matrices
    A = np.float64(A)
    B = np.float64(B)
    C = np.float64(C)
    W_front = np.dot(np.dot(C,np.linalg.inv(B)),A)  # compute CB^{-1}A
    W_back = np.dot(np.dot(np.transpose(A),np.linalg.inv(B)), A)  # compute A^{T}B^{-1}A
    W_star = np.dot(W_front, np.linalg.inv(W_back))
    strength = np.linalg.eig(np.dot(np.dot(np.transpose(A),np.linalg.inv(B)),A))

    # Compute W by using Gradient Descent
    W_0 = 0 * np.ones((1,feature_size))  # initial guess
    K_0 = np.zeros((1,feature_size))  # initial guess
    eta_phi = np.zeros(max_iter)
    eta_psi = np.zeros(max_iter)
    # setup for stepsize
    for i in range(max_iter):
        eta_psi[i] = -1/(100+5*i)
        eta_phi[i] = 1/(25+i)
    W_sad = compute_W_sad2(phi, psi, eta_phi, eta_psi, x, a, z, xx, max_iter, W_0, K_0, size, feature_size, choose)
    print(W_sad)
    #
    #
    print('Ordinary Regression Result: ')
    Y = pd.DataFrame(np.transpose(xx))
    variable = np.vstack((x,a,x**2,a**2,x**3,a**3))
    variable = np.transpose(variable)
    X = pd.DataFrame(variable)
    X = st.tools.add_constant(X)
    result = sm.OLS(Y,X).fit()
    list = ['const', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9]
    regression = np.zeros((1,feature_size))
    for i in range(feature_size):
        regression[0,i] = result.params[list[i]]
    print(regression)

