In [1]:
import torch
import torch.nn as nn
import torch.nn.functional as F
from scipy.linalg import svd
import numpy as np    
from torch.utils.data import Dataset, DataLoader
from time import perf_counter 
from torch.autograd import Variable
import torch.optim as optim
import math
from numpy import linalg as LA

In [2]:
r1 = 1
r2 = 2
r3 = 2 
p = 3
N = 25

In [3]:
### "Construct a Deep Network for Nonlinearity

class NetDW(nn.Module):

    def __init__(self, r1, r2, r3, p, N):
        
        self.r1 = r1
        self.r2 = r2
        self.r3 = r3
        self.p = p
        self.N = N
        super(NetDW, self).__init__()
        self.conv1 = nn.Conv2d(1, r2, kernel_size=(N, 1), bias=False) 
        self.conv2 = nn.Conv2d(1, r3, kernel_size=(1, p), bias=False) 
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(in_features=r2*r3, out_features=r1, bias=False)
        self.fc2 = nn.Linear(in_features=r1, out_features=N, bias=False)

    def forward(self, x):
        y1 = F.relu(self.conv1(x))
        y2 = F.relu(self.conv2(x))
        # first N, then p
        z1 = self.conv2(y1[:, :1, :, :])
        for i in range(1, y1.shape[1]):
            z1 = torch.cat([z1, self.conv2(y1[:, i:(i+1), :, :])], dim = 1) #Flattening is achieved     
            z1 = F.relu(z1.view(-1, self.r2*self.r3)) #-1 helps us figure out the batchsize
        # first p, then N
        z2 = F.relu(self.conv1(y2[:, :1, :, :]))
        for i in range(1, y2.shape[1]):
            z2 = torch.cat([z2, self.conv1(y2[:, i:(i+1), :, :])], dim = 2) #Flattening is achieved     
            z2 = F.relu(z2.view(-1, self.r2*self.r3)) #-1 helps us figure out the batchsize
        x1 = self.fc2(F.relu(self.fc1(z1)))
        x2 = self.fc2(F.relu(self.fc1(z2)))
        x = torch.stack([x1,x2])
        x = torch.mean(x,dim=0)
        return x

class NetRelu(nn.Module):

    def __init__(self, r1, r2, r3, p, N):
        
        self.r1 = r1
        self.r2 = r2
        self.r3 = r3
        self.p = p
        self.N = N
        super(NetRelu, self).__init__()
        # .conv1: 1 input matrix channel (N*P), r2 output channels, Nx1 convolution kernel
        # .conv2: 1 input matrix channel (1*P), r3 output channels, 1xr3 convolution kernel (kernel sharing)
        self.conv1 = nn.Conv2d(1, r2, kernel_size=(N, 1), bias=False) # stride is set to be (0,1) -> only move to the right
        self.conv2 = nn.Conv2d(1, r3, kernel_size=(1, p), bias=False)   # stride is set to be 0 -> no moving needed
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(in_features=r2*r3, out_features=r1, bias=False)  # 6*6 from image dimension
        self.fc2 = nn.Linear(in_features=r1, out_features=N, bias=False)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = F.relu(self.conv1(x))
        z = self.conv2(x[:, :1, :, :])
        for i in range(1, x.shape[1]):
            z = torch.cat([z, self.conv2(x[:, i:(i+1), :, :])], dim = 1) #Flattening is achieved     
        z = F.relu(z.view(-1, self.r2*self.r3)) #-1 helps us figure out the batchsize
        x = F.relu(self.fc1(z)) #activation can be added on the inside as well
        x = self.fc2(x)
        return x
    
class NetLinear(nn.Module):

    def __init__(self, r1, r2, r3, p, N):
        
        self.r1 = r1
        self.r2 = r2
        self.r3 = r3
        self.p = p
        self.N = N
        super(NetLinear, self).__init__()
        # .conv1: 1 input matrix channel (N*P), r2 output channels, Nx1 convolution kernel
        # .conv2: 1 input matrix channel (1*P), r3 output channels, 1xr3 convolution kernel (kernel sharing)
        self.conv1 = nn.Conv2d(1, r2, kernel_size=(N, 1), bias=False) # stride is set to be (0,1) -> only move to the right
        self.conv2 = nn.Conv2d(1, r3, kernel_size=(1, p), bias=False)   # stride is set to be 0 -> no moving needed
        # an affine operation: y = Wx + b
        self.fc1 = nn.Linear(in_features=r2*r3, out_features=r1, bias=False)  # 6*6 from image dimension
        self.fc2 = nn.Linear(in_features=r1, out_features=N, bias=False)

    def forward(self, x):
        # Max pooling over a (2, 2) window
        x = self.conv1(x)
        z = self.conv2(x[:, :1, :, :])
        for i in range(1, x.shape[1]):
            z = torch.cat([z, self.conv2(x[:, i:(i+1), :, :])], dim = 1) #Flattening is achieved     
        z = z.view(-1, self.r2*self.r3) #-1 helps us figure out the batchsize
        x = self.fc1(z) #activation can be added on the inside as well
        x = self.fc2(x)
        return x

In [4]:
### "Generate Linear Time Series Inputs and Targetted Output
# return large transition Matrix A
def kronecker(A, B):
    return torch.ger(A.view(-1), B.view(-1)).reshape(*(A.size() + B.size())).permute([0, 2, 1, 3]).reshape(A.size(0)*B.size(0),A.size(1)*B.size(1))

### " F-norm of A
class L2LossFun(nn.Module):
    
    def __init__(self):
        super(L2LossFun, self).__init__()
    def forward(self, A_Est, A_True):
        gap = math.sqrt(torch.sum((A_Est - A_True)**2))
        return gap

class genA_true:
    
    def genA(r1, r2, r3, p, N):
        
        def genU(x, y, r, trans):
            # x, y-stands for the size of the random matrix
            # r stands for the numbers of dimensions to keep
            # trans is a bool to judge whether to transpose or not
            URM = torch.randn(x, y)
            U, s, VT = svd(URM)
            if trans == True:
                Us = torch.tensor(np.transpose(U[:,:r]))
            else:
                Us = torch.tensor(U[:,:r])
            return Us

        G1 =  F.normalize(torch.randn(r1, r2*r3), p=2, dim=0)*0.9 # norm of G is fixed to be 0.95 -> l2 operator norm
        #G1 =  (F.normalize(torch.randn(r1*r2*r3), p=2, dim=0)*0.95).view(r1, r2*r3) # norm of G is fixed to be 0.95 -> Frobenius norm
        U1 = genU(N, N, r1, False)
        U2T = genU(N, N, r2, True)
        U3T = genU(p, p, r3, True)
        A_dim1 = torch.mm(torch.mm(U1, G1), kronecker(U3T,U2T))
        A_dim2 = torch.eye(N*p)[:N*(p-1),:]
        A = torch.cat((A_dim1, A_dim2), dim=0)
        return A
    
def rearrangeG(K):
    K = K.permute(1,0)
    i = j = 0
    for j in range(r3):
            for i in range(r2):
                if i == 0 and j == 0:
                    tmp = K[:1,:]
                else:
                    tmp = torch.cat([tmp, K[(r3*i+j):(r3*i+1+j),:]], dim = 0)   
    tmp = tmp.permute(1,0)
    return(tmp)

def Param_Matrix(net):
    
    U2T = torch.squeeze(net.conv1.weight)
    U3T = torch.squeeze(net.conv2.weight)
    G1 = rearrangeG(net.fc1.weight)
    U1 = net.fc2.weight
    A = torch.mm(torch.mm(U1, G1), kronecker(U3T,U2T))
    return A


In [5]:
### "We can use our method to generate RandomDataset
# For our linear settings burnt in is needed
class RandomDataset(Dataset):
    
    def __init__(self, p, N, Smp_size, A):
        self.X = []
        self.y = []
        for i in range(Smp_size+500):
            if i == 0:
                input_TS = torch.randn(1, 1, N, p)*200
                self.X.append(torch.squeeze(input_TS.view(1,1,N,p), dim = 0))
                input_tmp = torch.squeeze(torch.tensor(input_TS.permute(0, 1, 3, 2)).view(1,1,N*p))
                output_TS = torch.mv(A, input_tmp)
                self.y.append(output_TS[:N] + torch.randn(N))  
            else:
                input_TS = torch.cat([self.y[i-1].view(1,1,N,1), input_TS], dim = 3)
                self.X.append(torch.squeeze(input_TS[:,:,:N,:p], dim = 0))
                input_tmp = torch.squeeze(torch.tensor(input_TS[:,:,:N,:p].permute(0, 1, 3, 2)).view(1,1,N*p))
                out_tmp = torch.mv(A, input_tmp)
                self.y.append(out_tmp[:N] + torch.randn(N))
        self.X = self.X[500:]
        self.y = self.y[500:]
                
    def __getitem__(self, idx):
        return self.X[idx], self.y[idx]
    
    def __len__(self):
        return len(self.X)

In [6]:
# input to GPU
#device = torch.device("cuda:2")
#device2 = torch.device('cpu')

In [7]:
batch_size = 500
Smp_size = 500

# initialize dict with dynamic list for storage
names = {}
names["y_true"] = []
names["predErrorDW" + str(Smp_size)] =  []
names["y_predDW"] = []
names["predErrorN" + str(Smp_size)] =  []
names["y_predN"] = []
names["predErrorL" + str(Smp_size)] =  []
names["y_predL"] = []

In [8]:
distance = L2LossFun()
criterion = nn.MSELoss()

t1_start = perf_counter() 

specRadius = 1000

while specRadius > 1:
    A_True = genA_true.genA(r1, r2, r3, p, N)
    w, v = LA.eig(A_True)  
    specRadius = max(abs(w))

for iter in range(200):
    #A_True = A_True.to(device2)
    ds = RandomDataset(p=p, N=N, Smp_size=Smp_size, A=A_True)
    ### Generate one-step ahead forecast value
    X_t, y_t = ds[Smp_size-1]
    X_F = torch.cat([y_t.view(1,1,N,1), X_t.view(1,1,N,p)], dim = 3)[:,:,:N,:p]
    y_F = torch.mv(A_True, torch.squeeze(torch.tensor(X_F[:,:,:N,:p].permute(0, 1, 3, 2)).view(1,1,N*p)))[:N] + torch.randn(N)
    
    ds = DataLoader(ds, batch_size=batch_size, shuffle=False)
    
    ### ""NetDW
    netDW = NetDW(r1, r2, r3, p, N)
    optimizer = optim.SGD(netDW.parameters(), lr = 0.01, momentum=0.9)
    loss_last = 1000
    loss_new = 0
    
    i = 0
    while abs(loss_last - loss_new) > 0.00000001:
        if i > 0:
            loss_last = loss_new  
        for ix, (_x, _y) in enumerate(ds):
            _x = _x.view(Smp_size,1,N,p)
            _x = Variable(_x).float()
            #_x = _x.to(device)
            _y = torch.squeeze(Variable(_y).float())
            #_y = _y.to(device)
            yhat = netDW(_x).float()
            loss = criterion(yhat, _y)
            optimizer.zero_grad() 
            loss.backward() 
            optimizer.step() 
            loss_new = loss.item()
        i = i + 1
        
    X_F = Variable(X_F).float()
    #X_F = X_F.to(device)
    y_F = torch.tensor(Variable(y_F).float())
    #y_F = y_F.to(device)
    names["y_true"].append(y_F)
    y_predDW = netDW(X_F).float()
    predErrorDW = distance(y_predDW, y_F) - np.sqrt(N)
    print("PredError is {}.".format(predErrorDW))
    names["predErrorDW" + str(Smp_size)].append(predErrorDW)
    names["y_predDW"].append(y_predDW)
    
    ### ""NetN
    netN = NetRelu(r1, r2, r3, p, N)
    optimizerN = optim.SGD(netN.parameters(), lr = 0.01, momentum=0.9)
    loss_last = 1000
    loss_new = 0
    
    i = 0
    while abs(loss_last - loss_new) > 0.00000001:
        if i > 0:
            loss_last = loss_new  
        for ix, (_x, _y) in enumerate(ds):
            _x = _x.view(Smp_size,1,N,p)
            _x = Variable(_x).float()
            #_x = _x.to(device)
            _y = torch.squeeze(Variable(_y).float())
            #_y = _y.to(device)
            yhat = netN(_x).float()
            loss = criterion(yhat, _y)
            optimizerN.zero_grad() 
            loss.backward() 
            optimizerN.step() 
            loss_new = loss.item()
        i = i + 1
        
    X_F = Variable(X_F).float()
    #X_F = X_F.to(device)
    y_F = torch.tensor(Variable(y_F).float())
    #y_F = y_F.to(device)
    y_predN = netN(X_F).float()
    predErrorN = distance(y_predN, y_F) - np.sqrt(N)
    print("PredError for N is {}.".format(predErrorN))
    names["predErrorN" + str(Smp_size)].append(predErrorN)
    names["y_predN"].append(y_predN)

    ### ""FC
    netLinear = NetLinear(r1, r2, r3, p, N)
    optimizerL = optim.SGD(netLinear.parameters(), lr = 0.01, momentum=0.9)
    
    loss_last = 1000
    loss_new = 0

    i = 0
    while abs(loss_last - loss_new) > 0.00000001:
        if i > 0:
            loss_last = loss_new  
        for ix, (_x, _y) in enumerate(ds):
            _x = Variable(_x.view(Smp_size,1,N,p)).float()
            #_x = _x.to(device)
            _y = torch.squeeze(Variable(_y).float())
            #_y = _y.to(device)
            yhat = netLinear(_x).float()
            loss = criterion(yhat, _y)
            optimizerL.zero_grad() 
            loss.backward() 
            optimizerL.step()
            loss_new = loss.item()
        i = i + 1

    X_F = Variable(X_F.view(1,1,N,p)).float()
    #X_F = X_F.to(device)
    y_F = torch.tensor(Variable(y_F).float())
    #y_F = y_F.to(device)
    y_predL = netLinear(X_F).float()
    predErrorL = distance(y_predL, y_F) - np.sqrt(N)
    print("PredError for L is {}.".format(predErrorL))
    names["predErrorL" + str(Smp_size)].append(predErrorL)
    names["y_predL"].append(y_predL)
    print(iter)

t1_stop = perf_counter() 
print("Elapsed time during the whole program in seconds:", 
                                        t1_stop-t1_start) 
sim500_r2P3N25L_N = names
torch.save(sim500_r2P3N25L_N, "sim500_r2P3N25L_N.py")

  if sys.path[0] == '':


PredError is 0.3463285593892751.
PredError for N is 1.5489422941509527.




PredError for L is 0.38667254189526545.
0
PredError is -0.28186200807565953.
PredError for N is -0.28186200807565953.
PredError for L is -0.23692655236111904.
1
PredError is 0.5919940259516814.
PredError for N is 0.5153875975267725.
PredError for L is 0.48233298517402634.
2
PredError is -0.8216639341655707.
PredError for N is -0.8216639341655707.
PredError for L is -0.8608813452722766.
3
PredError is -0.17597518260500955.
PredError for N is -0.01569101024558517.
PredError for L is -0.1769884639263397.
4
PredError is 0.5913941947548018.
PredError for N is 0.5350885380363488.
PredError for L is 0.6885683502519582.
5
PredError is -1.3995588149096334.
PredError for N is -1.4065116645871734.
PredError for L is -1.0286366814056955.
6
PredError is -0.900135209987198.
PredError for N is -0.9005357859787315.
PredError for L is -0.8877513185436303.
7
PredError is -1.3296289805095545.
PredError for N is -1.3296289805095545.
PredError for L is -1.2852005344467243.
8
PredError is -1.396841539338138

PredError for N is -0.35515665472467894.
PredError for L is -0.3701601350270254.
71
PredError is 0.6116661556837713.
PredError for N is 0.6281598858384898.
PredError for L is -0.9652878598550547.
72
PredError is 0.008024452235090074.
PredError for N is 0.10930332518534236.
PredError for L is -0.08389785472938183.
73
PredError is -1.004466046625207.
PredError for N is 0.7752865939015718.
PredError for L is -0.9987778146620716.
74
PredError is 0.9343018602119688.
PredError for N is 0.9343018602119688.
PredError for L is 1.07727703857684.
75
PredError is 1.457644157144518.
PredError for N is 1.4032448768099268.
PredError for L is 1.0605808641892533.
76
PredError is 0.8165564379954846.
PredError for N is 0.8165564379954846.
PredError for L is -0.04655712678365731.
77
PredError is -0.5658334707130992.
PredError for N is -0.5658334707130992.
PredError for L is -0.5544266333133416.
78
PredError is -1.015268977494678.
PredError for N is -1.0060975535180932.
PredError for L is -1.10643722959830

PredError for L is -1.1421249771704032.
141
PredError is 0.3754616472580823.
PredError for N is 0.3754616472580823.
PredError for L is -0.09373989227752944.
142
PredError is 1.452632266392003.
PredError for N is 1.0943947475419726.
PredError for L is 0.9727977189548902.
143
PredError is -1.2961152288757534.
PredError for N is -1.2961152288757534.
PredError for L is -1.0261246742795116.
144
PredError is -0.9992244995373039.
PredError for N is -0.9881516935324628.
PredError for L is -0.9923940492429937.
145
PredError is 1.0461302306004674.
PredError for N is 1.0461302306004674.
PredError for L is 1.0000521339694677.
146
PredError is 2.1098589721943277.
PredError for N is 0.6984656962645577.
PredError for L is 0.5440123142254292.
147
PredError is -1.4673305427392878.
PredError for N is -1.4673305427392878.
PredError for L is -1.2867284786545232.
148
PredError is -0.7687650830130579.
PredError for N is -0.6491658188458374.
PredError for L is -0.6788604589780487.
149
PredError is -0.9645778

In [9]:
from statistics import mean, median
print(mean(names["predErrorDW" + str(Smp_size)]))
print(mean(names["predErrorN" + str(Smp_size)]))
print(mean(names["predErrorL" + str(Smp_size)]))

0.0813409819447413
0.10849765076146593
-0.1785901924459024
