In [None]:
import numpy as np
import scipy.stats as spst
import matplotlib.pyplot as plt
import statsmodels.api as sm
import statsmodels.tools as st
import pandas as pd
from scipy.linalg import svd


In [None]:
def compute_W_sad(phi, psi, eta_phi, eta_psi, x, a, z, xx, max_iter, W_0, K_0):

    """ Compute W^{sad} straightly

    Parameters
    ----------
    phi : function
        Feature map. phi : S * A -> R^{d_{phi}}
    psi : function
        Feature map. psi : Z -> R^{d_{psi}}
    eta_phi: array
        Stepsize in the stochastic gradient. It is a numpy array with shape ''(1, T)''
    eta_psi: array
        Stepsize in the stochastic gradient. It is a numpy array with shape ''(1, T)''
    x : array
        It's a given state sample matrix. It can be a numpy matirx with shape ''(d_{x}, T)'', where T is the parameter iter.
    a : array
        It's a given action sample matrix. It can be a numpy matirx with shape ''(d_{a}, T)'', where T is the parameter iter.
    z : array
        It's a given instrumental variable sample matrix. It can be a numpy matirx with shape ''(d_{z}, T)'', where T is the parameter iter.
    xx : array
        It's a given next state sample matrix. It can be a numpy matirx with shape ''(d_{x}, T)'', where T is the parameter iter.
    max_iter : int
        Number of iterations.
    W_0 : array
        It's an initial guess for output W. W_0 can be a numpy matirx with shape ''(d_{x}, d_{phi})''.
    K_0 : array
        It's an initial guess for K. K_0 can be a numpy matirx with shape ''(d_{x}, d_{psi})''.

    Output
    --------
    W_now : array
        It's an estimation matrix for the true W_sad with shape ''(d_{x}, d_{phi})''
    """

    W_now = W_0.copy()
    K_now = K_0.copy()
    W_next = W_0.copy()
    K_next = K_0.copy()

    for t in range(max_iter):
        Phi = np.matrix(phi(x[:, t],a[:, t]))
        Psi = np.matrix(psi(x[:,t],z[:, t]))
        W_next = W_now - eta_phi[t] * np.dot(K_now,np.dot(Psi, np.transpose(Phi)))
        K_next = K_now + eta_psi[t] * (np.dot(K_now,np.dot(Psi, np.transpose(Psi))) + np.dot([xx[:, t]], np.transpose(Psi)) - np.dot(W_now, np.dot(Phi, np.transpose(Psi))))
        W_now = W_next.copy()
        K_now = K_next.copy()

    return W_now


# Use mini batch stochastic gradient descent.
def compute_W_sad2(phi, psi, eta_phi, eta_psi, x, a, z, xx, max_iter, W_0, K_0, size, choose):

    """ Compute W^{sad} straightly

    Parameters
    ----------
    phi : function
        Feature map. phi : S * A -> R^{d_{phi}}
    eta_phi: array
        Stepsize in the stochastic gradient. It is a numpy array with shape ''(1, T)''
    x : array
        It's a given state sample matrix. It can be a numpy matirx with shape ''(d_{x}, T)'', where T is the parameter iter.
    a : array
        It's a given action sample matrix. It can be a numpy matirx with shape ''(d_{a}, T)'', where T is the parameter iter.
    z : array
        It's a given instrumental variable sample matrix. It can be a numpy matirx with shape ''(d_{z}, T)'', where T is the parameter iter.
    xx : array
        It's a given next state sample matrix. It can be a numpy matirx with shape ''(d_{x}, T)'', where T is the parameter iter.
    max_iter : int
        Number of iterations.
    W_0 : array
        It's an initial guess for output W. W_0 can be a numpy matirx with shape ''(d_{x}, d_{phi})''.

    Output
    --------
    W_now : array
        It's an estimation matrix for the true W_sad with shape ''(d_{x}, d_{phi})''
    """

    W_now = W_0.copy()
    K_now = K_0.copy()
    W_next = W_0.copy()
    K_next = K_0.copy()

    for i in range(int(max_iter/size)):
        A = np.zeros((feature_size, feature_size))
        B = np.zeros((feature_size, feature_size))
        C = np.zeros((state_size, feature_size))
        for j in range(size):
            Phi = phi(np.transpose(np.array([x[:,i * size + j]])),np.transpose(np.array([a[:,i * size + j]])))
            Psi = psi(np.transpose(np.array([x[:,i * size + j]])),np.transpose(np.array([z[:,i * size + j]])))
            A = A + 1/size * np.dot(Psi, np.transpose(Phi))
            B = B + 1/size * np.dot(Psi, np.transpose(Psi))
            C = C + 1/size * np.dot(np.transpose(np.array([xx[:,i * size + j]])),np.transpose(Psi))

        A = np.float64(A)
        B = np.float64(B)
        C = np.float64(C)

        for j in range(200):
            K_next = K_now + eta_psi[j] * (np.dot(K_now, B) + C - np.dot(W_now, np.transpose(A)))
            K_now = K_next.copy()

        W_next = W_now - eta_phi[i] * np.dot(K_now, A)
        W_now = W_next.copy()

    return W_now


def phi(x,a):

    """
    The feature map
    :param x: a state. The shape is ''(d_{x}, 1)''
    :param a: an action. The shape is ''(d_{a}, 1)''
    :return:
    """
    return np.vstack([[1],x,a])


def psi(x,z):

    """
    The feature map
    :param z: an instrumental variable. The shape is ''(d_{z}, 1)''
    :return:
    """
    return np.vstack([[1],x,z])


def generate_action(x,z,e):

    """
    A function to generate action while collecting data.
    """

    feature_size = np.shape(x)[0]
    iv_size = np.shape(z)[0]
    a = np.zeros(iv_size)

    for i in range(feature_size):
      if np.random.rand() < float(1 / (1 + np.exp(-2 * z[i] - 0.1 * x[i] + 0.5 * e[i]))):
          a[i] = 1
      else:
          a[i] = 0

    return a

def generate_IV(x,S,V,choose):

    """
    A function to generate IV.
    """

    if choose == 0:
        z = np.random.multinomial(10,[1/10.]*10, size=1)
    elif choose == 1:
        z = np.random.multinomial(10,[1/5,1/5,1/10,1/10,1/10,1/10,1/20,1/20,1/20,1/20], size=1)
    elif choose == 2:
        z = np.random.multinomial(10,[1/5,1/5,1/5,1/5,1/30,1/30,1/30,1/30,1/30,1/30], size=1)        

    z = np.ndarray.flatten(z)

    return z

def F(x, a, movie,S):

    """
    A deterministic transition function we want to approach.
    """
    
    next = x + np.dot(movie,a) 

    return next

In [None]:
data = pd.read_csv('ml-1m/ratings.dat', 
    names=['user_id', 'movie_id', 'rating', 'time'], engine = 'python', delimiter='::')

In [None]:
ratings_mat = np.zeros((np.max(data.user_id.values), np.max(data.movie_id.values)), dtype=np.uint8)
for i in range(len(data.rating.values)):
    ratings_mat[data.user_id.values[i]-1, data.movie_id.values[i]-1] = data.rating.values[i]

In [None]:
def do_svd(mat, k=0, option=False):
    U, Sigma, V = svd(mat)
    U = U[:,:k]
    V = V[:k,:]
    Sigma = Sigma[:k]
    if option:
        return Sigma
    else:
        return U, Sigma, V

In [None]:
dimension = 10
U, S, V = do_svd(ratings_mat, k=dimension, option=False)
S = np.diag(S)

In [None]:
for choose in range(3):
    Mean = np.zeros(dimension)
    Identity = np.eye(dimension)
    true_preference = np.transpose(np.array([U[0,:]]))
    feature_size = 21
    state_size = 10
    movie_size = 10
    max_iter = 5*2000
    horizon = 5
    movie = V[:,:movie_size]
    size = 32

    x = np.array(true_preference + np.transpose([np.random.multivariate_normal(mean = Mean, cov = Identity)]))
    z = np.transpose(np.array([generate_IV(x,S,movie,choose)]))
    e = np.array(np.transpose([1 * np.random.multivariate_normal(mean = Mean, cov= 1 * Identity)]))
    a = np.array(np.transpose([generate_action(x,z,e)]))
    x_next = np.array(F(x,a,movie,S)+e)
    xx = x_next.copy()  # store the next state
    
    for i in range(max_iter):
        if (i+1) % horizon == 0:
            true_preference = np.transpose(np.array([U[int((i+1)/horizon),:]]))
            x_next = np.array(true_preference + np.transpose([np.random.multivariate_normal(mean = Mean, cov = Identity)]))
        x = np.hstack((x,x_next))
        z_next = np.transpose(np.array([generate_IV(x[:,i+1],S,movie,choose)]))
        e_next = np.array(np.transpose([1 * np.random.multivariate_normal(mean = Mean, cov= 1 * Identity)]))
        z = np.hstack((z,z_next))
        e = np.hstack((e,e_next))
        a_next = np.array(np.transpose([generate_action(x[:,i+1],z[:,i+1],e[:,i+1])]))
        x_next = np.array(F(x_next,a_next,movie,S)+e_next)
        a = np.hstack((a,a_next))
        xx = np.hstack((xx,x_next))


    A = np.zeros((1+state_size+movie_size,1+state_size+movie_size))
    B = np.zeros((1+state_size+movie_size,1+state_size+movie_size))
    C = np.zeros((10,feature_size))
    for i in range(max_iter):
        Phi = phi(np.transpose(np.array([x[:,i]])),np.transpose(np.array([a[:,i]])))
        Psi = psi(np.transpose(np.array([x[:,i]])),np.transpose(np.array([z[:,i]])))
        A = A + 1/max_iter * np.dot(Psi, np.transpose(Phi))
        B = B + 1/max_iter * np.dot(Psi, np.transpose(Psi))
        C = C + 1/max_iter * np.dot(np.transpose(np.array([xx[:,i]])),np.transpose(Psi))

    # use sample to estimate covariance matrices
    A = np.float64(A)
    B = np.float64(B)
    C = np.float64(C)
    strength = np.linalg.eig(np.dot(np.dot(np.transpose(A),np.linalg.inv(B)),A))
    
    # Compute W by using Gradient Descent
    W_0 = 0.5 * np.ones((state_size,1+state_size+movie_size))  # initial guess
    K_0 = np.zeros((state_size,1+state_size+movie_size))  # initial guess
    eta_phi = np.zeros(max_iter)
    eta_psi = np.zeros(max_iter)
    # setup for stepsize
    for i in range(max_iter):
        eta_psi[i] = -1/(60+i)
        eta_phi[i]= 1/ (65+i)
    W_sad = compute_W_sad2(phi, psi, eta_phi, eta_psi, x, a, z, xx, max_iter, W_0, K_0, size, choose)
    print(W_sad)
    #
    #
    print('Ordinary Regression ')
    Y1 = pd.DataFrame(np.transpose(xx[0]))
    Y2 = pd.DataFrame(np.transpose(xx[1]))
    Y3 = pd.DataFrame(np.transpose(xx[2]))
    Y4 = pd.DataFrame(np.transpose(xx[3]))
    Y5 = pd.DataFrame(np.transpose(xx[4]))
    Y6 = pd.DataFrame(np.transpose(xx[5]))
    Y7 = pd.DataFrame(np.transpose(xx[6]))
    Y8 = pd.DataFrame(np.transpose(xx[7]))
    Y9 = pd.DataFrame(np.transpose(xx[8]))
    Y10 = pd.DataFrame(np.transpose(xx[9]))
    variable = np.vstack((x,a))
    variable = np.transpose(variable)
    X = pd.DataFrame(variable)
    X = st.tools.add_constant(X)
    result1 = sm.OLS(Y1,X).fit()
    result2 = sm.OLS(Y2,X).fit()
    result3 = sm.OLS(Y3,X).fit()
    result4 = sm.OLS(Y4,X).fit()
    result5 = sm.OLS(Y5,X).fit()
    result6 = sm.OLS(Y6,X).fit()
    result7 = sm.OLS(Y7,X).fit()
    result8 = sm.OLS(Y8,X).fit()
    result9 = sm.OLS(Y9,X).fit()
    result10 = sm.OLS(Y10,X).fit()
    regression = np.zeros((10,21))
    list = ['const', 0, 1, 2, 3, 4, 5, 6, 7, 8, 9, 10, 11, 12, 13, 14, 15, 16, 17, 18, 19]
    for i in range(21):
        regression[0,i] = result1.params[list[i]]
        regression[1,i] = result2.params[list[i]]
        regression[2,i] = result3.params[list[i]]
        regression[3,i] = result4.params[list[i]]
        regression[4,i] = result5.params[list[i]]
        regression[5,i] = result6.params[list[i]]
        regression[6,i] = result7.params[list[i]]
        regression[7,i] = result8.params[list[i]]
        regression[8,i] = result9.params[list[i]]
        regression[9,i] = result10.params[list[i]]

    print(regression)
