In [4]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from scipy.linalg import svd
import numpy as np    
from torch.utils.data import Dataset, DataLoader
from time import perf_counter 
from torch.autograd import Variable
import torch.optim as optim
import math
from numpy import linalg as LA

In [5]:
r1 = 2
r2 = 2
r3 = 2 
p = 3
N = 25

In [6]:
### "Construct a Deep Network for Nonlinearity

class NetDW(nn.Module):

    def __init__(self, r1, r2, r3, p, N):
        
        self.r1 = r1
        self.r2 = r2
        self.r3 = r3
        self.p = p
        self.N = N
        super(NetDW, self).__init__()
        self.conv1 = nn.Conv2d(1, r2, kernel_size=(N, 1), bias=False) 
        self.conv2 = nn.Conv2d(1, r3, kernel_size=(1, p), bias=False) 
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(in_features=r2*r3, out_features=r1, bias=False)
        self.fc2 = nn.Linear(in_features=r1, out_features=N, bias=False)

    def forward(self, x):
        y1 = F.relu(self.conv1(x))
        y2 = F.relu(self.conv2(x))
        # first N, then p
        z1 = self.conv2(y1[:, :1, :, :])
        for i in range(1, y1.shape[1]):
            z1 = torch.cat([z1, self.conv2(y1[:, i:(i+1), :, :])], dim = 1) #Flattening is achieved     
            z1 = F.relu(z1.view(-1, self.r2*self.r3)) #-1 helps us figure out the batchsize
        # first p, then N
        z2 = F.relu(self.conv1(y2[:, :1, :, :]))
        for i in range(1, y2.shape[1]):
            z2 = torch.cat([z2, self.conv1(y2[:, i:(i+1), :, :])], dim = 2) #Flattening is achieved     
            z2 = F.relu(z2.view(-1, self.r2*self.r3)) #-1 helps us figure out the batchsize
        x1 = self.fc2(F.relu(self.fc1(z1)))
        x2 = self.fc2(F.relu(self.fc1(z2)))
        x = torch.stack([x1,x2])
        x = torch.mean(x,dim=0)
        return x

class NetRelu(nn.Module):

    def __init__(self, r1, r2, r3, p, N):
        
        self.r1 = r1
        self.r2 = r2
        self.r3 = r3
        self.p = p
        self.N = N
        super(NetRelu, self).__init__()
        # .conv1: 1 input matrix channel (N*P), r2 output channels, Nx1 convolution kernel
        # .conv2: 1 input matrix channel (1*P), r3 output channels, 1xr3 convolution kernel (kernel sharing)
        self.conv1 = nn.Conv2d(1, r2, kernel_size=(N, 1), bias=False) # stride is set to be (0,1) -> only move to the right
        self.conv2 = nn.Conv2d(1, r3, kernel_size=(1, p), bias=False)   # stride is set to be 0 -> no moving needed
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(in_features=r2*r3, out_features=r1, bias=False)  # 6*6 from image dimension
        self.fc2 = nn.Linear(in_features=r1, out_features=N, bias=False)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.relu(self.conv1(x))
        z = self.conv2(x[:, :1, :, :])
        for i in range(1, x.shape[1]):
            z = torch.cat([z, self.conv2(x[:, i:(i+1), :, :])], dim = 1) #Flattening is achieved     
        z = F.relu(z.view(-1, self.r2*self.r3)) #-1 helps us figure out the batchsize
        x = F.relu(self.fc1(z)) #activation can be added on the inside as well
        x = self.fc2(x)
        return x
    
class NetLinear(nn.Module):

    def __init__(self, r1, r2, r3, p, N):
        
        self.r1 = r1
        self.r2 = r2
        self.r3 = r3
        self.p = p
        self.N = N
        super(NetLinear, self).__init__()
        # .conv1: 1 input matrix channel (N*P), r2 output channels, Nx1 convolution kernel
        # .conv2: 1 input matrix channel (1*P), r3 output channels, 1xr3 convolution kernel (kernel sharing)
        self.conv1 = nn.Conv2d(1, r2, kernel_size=(N, 1), bias=False) # stride is set to be (0,1) -> only move to the right
        self.conv2 = nn.Conv2d(1, r3, kernel_size=(1, p), bias=False)   # stride is set to be 0 -> no moving needed
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(in_features=r2*r3, out_features=r1, bias=False)  # 6*6 from image dimension
        self.fc2 = nn.Linear(in_features=r1, out_features=N, bias=False)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = self.conv1(x)
        z = self.conv2(x[:, :1, :, :])
        for i in range(1, x.shape[1]):
            z = torch.cat([z, self.conv2(x[:, i:(i+1), :, :])], dim = 1) #Flattening is achieved     
        z = z.view(-1, self.r2*self.r3) #-1 helps us figure out the batchsize
        x = self.fc1(z) #activation can be added on the inside as well
        x = self.fc2(x)
        return x

In [7]:
### "Generate Linear Time Series Inputs and Targetted Output
# return large transition Matrix A
def kronecker(A, B):
    return torch.ger(A.view(-1), B.view(-1)).reshape(*(A.size() + B.size())).permute([0, 2, 1, 3]).reshape(A.size(0)*B.size(0),A.size(1)*B.size(1))

### " F-norm of A
class L2LossFun(nn.Module):
    
    def __init__(self):
        super(L2LossFun, self).__init__()
    def forward(self, A_Est, A_True):
        gap = math.sqrt(torch.sum((A_Est - A_True)**2))
        return gap

class genA_true:
    
    def genA(r1, r2, r3, p, N):
        
        def genU(x, y, r, trans):
            # x, y-stands for the size of the random matrix
            # r stands for the numbers of dimensions to keep
            # trans is a bool to judge whether to transpose or not
            URM = torch.randn(x, y)
            U, s, VT = svd(URM)
            if trans == True:
                Us = torch.tensor(np.transpose(U[:,:r]))
            else:
                Us = torch.tensor(U[:,:r])
            return Us

        G1 =  F.normalize(torch.randn(r1, r2*r3), p=2, dim=0)*0.9 # norm of G is fixed to be 0.95 -> l2 operator norm
        #G1 =  (F.normalize(torch.randn(r1*r2*r3), p=2, dim=0)*0.95).view(r1, r2*r3) # norm of G is fixed to be 0.95 -> Frobenius norm
        U1 = genU(N, N, r1, False)
        U2T = genU(N, N, r2, True)
        U3T = genU(p, p, r3, True)
        A_dim1 = torch.mm(torch.mm(U1, G1), kronecker(U3T,U2T))
        A_dim2 = torch.eye(N*p)[:N*(p-1),:]
        A = torch.cat((A_dim1, A_dim2), dim=0)
        return A
    
def rearrangeG(K):
    K = K.permute(1,0)
    i = j = 0
    for j in range(r3):
            for i in range(r2):
                if i == 0 and j == 0:
                    tmp = K[:1,:]
                else:
                    tmp = torch.cat([tmp, K[(r3*i+j):(r3*i+1+j),:]], dim = 0)   
    tmp = tmp.permute(1,0)
    return(tmp)

def Param_Matrix(net):
    
    U2T = torch.squeeze(net.conv1.weight)
    U3T = torch.squeeze(net.conv2.weight)
    G1 = rearrangeG(net.fc1.weight)
    U1 = net.fc2.weight
    A = torch.mm(torch.mm(U1, G1), kronecker(U3T,U2T))
    return A


In [8]:
### "We can use our method to generate RandomDataset
# For our linear settings burnt in is needed
class RandomDataset(Dataset):
    
    def __init__(self, p, N, Smp_size, A):
        self.X = []
        self.y = []
        for i in range(Smp_size+500):
            if i == 0:
                input_TS = torch.randn(1, 1, N, p)*200
                self.X.append(torch.squeeze(input_TS.view(1,1,N,p), dim = 0))
                input_tmp = torch.squeeze(torch.tensor(input_TS.permute(0, 1, 3, 2)).view(1,1,N*p))
                output_TS = torch.mv(A, input_tmp)
                self.y.append(output_TS[:N] + torch.randn(N))  
            else:
                input_TS = torch.cat([self.y[i-1].view(1,1,N,1), input_TS], dim = 3)
                self.X.append(torch.squeeze(input_TS[:,:,:N,:p], dim = 0))
                input_tmp = torch.squeeze(torch.tensor(input_TS[:,:,:N,:p].permute(0, 1, 3, 2)).view(1,1,N*p))
                out_tmp = torch.mv(A, input_tmp)
                self.y.append(out_tmp[:N] + torch.randn(N))
        self.X = self.X[500:]
        self.y = self.y[500:]
                
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
    def __len__(self):
        return len(self.X)

In [9]:
# input to GPU
#device = torch.device("cuda:2")
#device2 = torch.device('cpu')

In [7]:
batch_size = 500
Smp_size = 500

# initialize dict with dynamic list for storage
names = {}
names["y_true"] = []
names["predErrorDW" + str(Smp_size)] =  []
names["y_predDW"] = []
names["predErrorN" + str(Smp_size)] =  []
names["y_predN"] = []
names["predErrorL" + str(Smp_size)] =  []
names["y_predL"] = []

In [8]:
distance = L2LossFun()
criterion = nn.MSELoss()

t1_start = perf_counter() 

specRadius = 1000

while specRadius > 1:
    A_True = genA_true.genA(r1, r2, r3, p, N)
    w, v = LA.eig(A_True)  
    specRadius = max(abs(w))

for iter in range(200):
    #A_True = A_True.to(device2)
    ds = RandomDataset(p=p, N=N, Smp_size=Smp_size, A=A_True)
    ### Generate one-step ahead forecast value
    X_t, y_t = ds[Smp_size-1]
    X_F = torch.cat([y_t.view(1,1,N,1), X_t.view(1,1,N,p)], dim = 3)[:,:,:N,:p]
    y_F = torch.mv(A_True, torch.squeeze(torch.tensor(X_F[:,:,:N,:p].permute(0, 1, 3, 2)).view(1,1,N*p)))[:N] + torch.randn(N)
    
    ds = DataLoader(ds, batch_size=batch_size, shuffle=False)
    
    ### ""NetDW
    netDW = NetDW(r1, r2, r3, p, N)
    optimizer = optim.SGD(netDW.parameters(), lr = 0.01, momentum=0.9)
    loss_last = 1000
    loss_new = 0
    
    i = 0
    while abs(loss_last - loss_new) > 0.00000001:
        if i > 0:
            loss_last = loss_new  
        for ix, (_x, _y) in enumerate(ds):
            _x = _x.view(Smp_size,1,N,p)
            _x = Variable(_x).float()
            #_x = _x.to(device)
            _y = torch.squeeze(Variable(_y).float())
            #_y = _y.to(device)
            yhat = netDW(_x).float()
            loss = criterion(yhat, _y)
            optimizer.zero_grad() 
            loss.backward() 
            optimizer.step() 
            loss_new = loss.item()
        i = i + 1
        
    X_F = Variable(X_F).float()
    #X_F = X_F.to(device)
    y_F = torch.tensor(Variable(y_F).float())
    #y_F = y_F.to(device)
    names["y_true"].append(y_F)
    y_predDW = netDW(X_F).float()
    predErrorDW = distance(y_predDW, y_F) - np.sqrt(N)
    print("PredError is {}.".format(predErrorDW))
    names["predErrorDW" + str(Smp_size)].append(predErrorDW)
    names["y_predDW"].append(y_predDW)
    
    ### ""NetN
    netN = NetRelu(r1, r2, r3, p, N)
    optimizerN = optim.SGD(netN.parameters(), lr = 0.01, momentum=0.9)
    loss_last = 1000
    loss_new = 0
    
    i = 0
    while abs(loss_last - loss_new) > 0.00000001:
        if i > 0:
            loss_last = loss_new  
        for ix, (_x, _y) in enumerate(ds):
            _x = _x.view(Smp_size,1,N,p)
            _x = Variable(_x).float()
            #_x = _x.to(device)
            _y = torch.squeeze(Variable(_y).float())
            #_y = _y.to(device)
            yhat = netN(_x).float()
            loss = criterion(yhat, _y)
            optimizerN.zero_grad() 
            loss.backward() 
            optimizerN.step() 
            loss_new = loss.item()
        i = i + 1
        
    X_F = Variable(X_F).float()
    #X_F = X_F.to(device)
    y_F = torch.tensor(Variable(y_F).float())
    #y_F = y_F.to(device)
    y_predN = netN(X_F).float()
    predErrorN = distance(y_predN, y_F) - np.sqrt(N)
    print("PredError for N is {}.".format(predErrorN))
    names["predErrorN" + str(Smp_size)].append(predErrorN)
    names["y_predN"].append(y_predN)

    ### ""FC
    netLinear = NetLinear(r1, r2, r3, p, N)
    optimizerL = optim.SGD(netLinear.parameters(), lr = 0.01, momentum=0.9)
    
    loss_last = 1000
    loss_new = 0

    i = 0
    while abs(loss_last - loss_new) > 0.00000001:
        if i > 0:
            loss_last = loss_new  
        for ix, (_x, _y) in enumerate(ds):
            _x = Variable(_x.view(Smp_size,1,N,p)).float()
            #_x = _x.to(device)
            _y = torch.squeeze(Variable(_y).float())
            #_y = _y.to(device)
            yhat = netLinear(_x).float()
            loss = criterion(yhat, _y)
            optimizerL.zero_grad() 
            loss.backward() 
            optimizerL.step()
            loss_new = loss.item()
        i = i + 1

    X_F = Variable(X_F.view(1,1,N,p)).float()
    #X_F = X_F.to(device)
    y_F = torch.tensor(Variable(y_F).float())
    #y_F = y_F.to(device)
    y_predL = netLinear(X_F).float()
    predErrorL = distance(y_predL, y_F) - np.sqrt(N)
    print("PredError for L is {}.".format(predErrorL))
    names["predErrorL" + str(Smp_size)].append(predErrorL)
    names["y_predL"].append(y_predL)
    print(iter)

t1_stop = perf_counter() 
print("Elapsed time during the whole program in seconds:", 
                                        t1_stop-t1_start) 
sim500_r2P3N25L = names
torch.save(sim500_r2P3N25L, "sim500_r2P3N25L.py")

  if sys.path[0] == '':


PredError is 0.4003648422757049.




PredError for N is 0.4003648422757049.




PredError for L is -0.15076250387514634.
0
PredError is 0.4634080354720487.
PredError for N is 0.3169544170004821.
PredError for L is -0.044514252945750954.
1
PredError is 1.3327249268209416.
PredError for N is 1.2964578873752686.
PredError for L is 0.9059868002077787.
2
PredError is 0.3865611805045299.
PredError for N is 0.39630853433432733.
PredError for L is 0.068943209087867.
3
PredError is -0.4346529293905741.
PredError for N is -0.4346529293905741.
PredError for L is -0.6070658284028054.
4
PredError is -1.041293138426496.
PredError for N is -1.1154603558767562.
PredError for L is -0.9933184161387212.
5
PredError is 1.1484086497695118.
PredError for N is 1.0298460279958963.
PredError for L is 0.7136247525104409.
6
PredError is -0.14149684079571223.
PredError for N is -0.14149684079571223.
PredError for L is -0.48630086922714266.
7
PredError is 1.1850988179410127.
PredError for N is 0.8037036251899501.
PredError for L is 0.8412884918583838.
8
PredError is 0.3705912406976388.
PredEr

PredError for N is 0.7328387705091046.
PredError for L is 0.7974826874665704.
71
PredError is -0.3129441535293642.
PredError for N is -0.3129441535293642.
PredError for L is -0.35985097532510224.
72
PredError is 0.9188156881584533.
PredError for N is 0.8869857271908206.
PredError for L is 0.5486737840423412.
73
PredError is 1.1609714430692222.
PredError for N is 1.1796211403082095.
PredError for L is 0.562875627843435.
74
PredError is -0.1149468031844556.
PredError for N is -0.1301357227101665.
PredError for L is -0.1492104769860365.
75
PredError is 0.2266677823033083.
PredError for N is 0.2530063922116268.
PredError for L is 0.12719884083806665.
76
PredError is 0.8780856855701158.
PredError for N is 0.8780856855701158.
PredError for L is 0.4329276317633335.
77
PredError is -1.0186482249382798.
PredError for N is -1.7442517863063736.
PredError for L is -1.641393563116396.
78
PredError is -0.8499584246040799.
PredError for N is -0.8119630161522959.
PredError for L is -0.7820342616264915

PredError for L is 0.5267943631563847.
141
PredError is 0.8731644380484891.
PredError for N is 0.8731644380484891.
PredError for L is 0.2673221196364777.
142
PredError is 0.9789577895130934.
PredError for N is 0.7602639847767749.
PredError for L is 0.4794771326484062.
143
PredError is 0.7947164871337549.
PredError for N is 0.8309276888332571.
PredError for L is 0.21232199971012022.
144
PredError is 0.06932041707434333.
PredError for N is 0.06932041707434333.
PredError for L is 0.03355250776116048.
145
PredError is 0.3999021450642539.
PredError for N is 0.3999021450642539.
PredError for L is -0.08308083703059221.
146
PredError is 0.1395709687718094.
PredError for N is 0.1395709687718094.
PredError for L is -0.1943084525336367.
147
PredError is -0.288417980449335.
PredError for N is 0.1321348442624135.
PredError for L is -0.4577339171287491.
148
PredError is 1.8222779263774012.
PredError for N is -0.1469302345761312.
PredError for L is -0.5828958644036764.
149
PredError is 0.523037260627

In [9]:
from statistics import mean, median
print(mean(names["predErrorDW" + str(Smp_size)]))
print(mean(names["predErrorN" + str(Smp_size)]))
print(mean(names["predErrorL" + str(Smp_size)]))

0.19125785402605738
0.19420669428091727
-0.09204430506553138
