In [1]:
import torch
import numpy as np
torch.cuda.is_available()

True

## Build the model
### Random initialization; white noise data; to see if it converges to orthogonal basis

#### Define dataset

In [2]:
# operate on GPU
dev = torch.device("cuda")

In [3]:
# generate data (white noise)
N = 512
n_sample = 100;
batch_size = 100;
x = torch.rand(N,n_sample, device = dev)*2 - 1

In [4]:
x.dtype

torch.float32

In [29]:
x.size()[0]

512

In [5]:
M = 512

#### initialize parameters

In [6]:
a1 = torch.rand(1, requires_grad = True, device = dev)
a2 = torch.rand(1, requires_grad = True, device = dev)
a3 = torch.rand(1, requires_grad = True, device = dev)
a4 = torch.rand(1, requires_grad = True, device = dev)
print(a1,a2,a3,a4)
print(a1.dtype)

tensor([0.5454], device='cuda:0', requires_grad=True) tensor([0.0388], device='cuda:0', requires_grad=True) tensor([0.7586], device='cuda:0', requires_grad=True) tensor([0.0649], device='cuda:0', requires_grad=True)
torch.float32


In [7]:
w1 = torch.rand(M, requires_grad = True, device = dev)*torch.pi
w2 = torch.rand(M, requires_grad = True, device = dev)*torch.pi
w1.retain_grad()
w2.retain_grad()
print(w1)
print(w2.dtype)

tensor([0.5580, 1.1050, 2.0936, 2.1583, 1.2282, 2.9905, 0.5601, 2.5849, 1.3103,
        1.7071, 0.9474, 0.4678, 2.0186, 2.3792, 2.6779, 1.5153, 1.4215, 1.5121,
        0.1231, 2.4616, 0.1045, 0.0853, 2.2903, 2.5784, 0.0123, 2.6781, 1.1079,
        1.2148, 1.4405, 1.6577, 1.9348, 2.1631, 1.8764, 0.9698, 0.2792, 2.2116,
        2.2410, 2.8711, 1.1760, 2.9956, 0.1928, 0.4002, 2.0546, 1.2711, 2.3422,
        3.1148, 1.1629, 2.0339, 1.6107, 1.1515, 3.1112, 1.0367, 0.9851, 2.4173,
        0.6416, 0.7772, 2.7525, 0.0611, 2.4794, 1.6451, 2.9996, 1.1254, 0.2746,
        1.3886, 1.7065, 2.1581, 0.9915, 1.1825, 0.9547, 2.6626, 0.3753, 2.0896,
        1.9502, 0.3263, 2.2918, 1.2987, 0.9476, 0.2290, 1.0726, 1.8689, 2.0133,
        1.1785, 0.1544, 0.9936, 1.3871, 1.7706, 2.1571, 1.0286, 1.7858, 0.3325,
        0.7180, 2.6164, 0.0172, 0.8346, 0.3791, 1.6737, 1.7010, 2.0426, 0.5623,
        0.3951, 0.8667, 2.9115, 1.8327, 0.9459, 2.2860, 2.1892, 0.0990, 0.8333,
        2.1960, 0.8727, 3.0540, 0.0751, 

In [31]:
w1.size()[0]

512

#### forward pass and compute the loss

In [8]:
# weight matrix W1
W1 = torch.zeros(M,N, device = dev)
for m in range(M):
    W1[m,0] = a1
    for n in range(1,N):
        W1[m,n] = a2 * torch.cos(w1[m]*n)
W1

tensor([[ 0.5454,  0.0329,  0.0170,  ...,  0.0117, -0.0096, -0.0281],
        [ 0.5454,  0.0174, -0.0231,  ..., -0.0385, -0.0131,  0.0267],
        [ 0.5454, -0.0194, -0.0194,  ..., -0.0316,  0.0352, -0.0036],
        ...,
        [ 0.5454,  0.0366,  0.0303,  ..., -0.0195, -0.0295, -0.0362],
        [ 0.5454,  0.0386,  0.0380,  ..., -0.0143, -0.0179, -0.0213],
        [ 0.5454,  0.0319,  0.0136,  ...,  0.0283,  0.0081, -0.0149]],
       device='cuda:0', grad_fn=<CopySlices>)

In [9]:
# frequency domain X
X = torch.matmul(W1,x)/np.sqrt(N)
X.dtype

torch.float32

In [10]:
X.size()

torch.Size([512, 100])

In [11]:
np.max(X.to("cpu").detach().numpy())

0.07877216

In [12]:
# weight matrix W2_1 with same frequency components w1
W2_1 = torch.zeros(N,M, device = dev)
for m in range(M):
    W2_1[0,m] = a3
    for n in range(1,N):
        W2_1[n,m] = a4 * torch.cos(w1[m]*n)
W2_1

tensor([[ 0.7586,  0.7586,  0.7586,  ...,  0.7586,  0.7586,  0.7586],
        [ 0.0551,  0.0292, -0.0324,  ...,  0.0613,  0.0646,  0.0533],
        [ 0.0285, -0.0387, -0.0326,  ...,  0.0507,  0.0636,  0.0228],
        ...,
        [ 0.0196, -0.0645, -0.0529,  ..., -0.0326, -0.0239,  0.0474],
        [-0.0161, -0.0220,  0.0590,  ..., -0.0494, -0.0300,  0.0136],
        [-0.0470,  0.0447, -0.0060,  ..., -0.0605, -0.0357, -0.0250]],
       device='cuda:0', grad_fn=<CopySlices>)

In [13]:
# weight matrix W2_2 with different frequency components w2
W2_2 = torch.zeros(N,M, device = dev)
for m in range(M):
    W2_2[0,m] = a3
    for n in range(1,N):
        W2_2[n,m] = a4 * torch.cos(w2[m]*n)
W2_2

tensor([[ 0.7586,  0.7586,  0.7586,  ...,  0.7586,  0.7586,  0.7586],
        [ 0.0474, -0.0443, -0.0247,  ...,  0.0396,  0.0503, -0.0637],
        [ 0.0043, -0.0044, -0.0461,  ..., -0.0166,  0.0129,  0.0602],
        ...,
        [ 0.0619,  0.0500,  0.0519,  ...,  0.0562, -0.0648,  0.0637],
        [ 0.0586, -0.0644,  0.0162,  ...,  0.0085, -0.0523, -0.0602],
        [ 0.0238,  0.0379, -0.0643,  ..., -0.0458, -0.0161,  0.0544]],
       device='cuda:0', grad_fn=<CopySlices>)

In [14]:
y = torch.matmul(W2_1,X)/np.sqrt(N)
np.max(y.to("cpu").detach().numpy())

0.40914237

In [15]:
y.size()

torch.Size([512, 100])

In [16]:
loss = ((x-y)**2).mean()
loss

tensor(0.3331, device='cuda:0', grad_fn=<MeanBackward0>)

In [17]:
loss.backward()

In [25]:
a4.grad

tensor([-0.0130], device='cuda:0')

In [19]:
lr = 1

In [20]:
with torch.no_grad():
    w1 -= lr * w1.grad
    w1.grad.zero_()

In [51]:
if epoch % 2 == 0:
    print(f'epoch{epoch+1}: a1 = {a1:.3f}, loss = {loss:.8f}')

SyntaxError: EOL while scanning string literal (<ipython-input-51-51ffada361be>, line 2)

In [21]:
print(f'w1 = {w1}, loss = {loss}')

w1 = tensor([0.5578, 1.1050, 2.0936, 2.1582, 1.2282, 2.9904, 0.5599, 2.5849, 1.3103,
        1.7072, 0.9475, 0.4680, 2.0186, 2.3791, 2.6778, 1.5151, 1.4215, 1.5121,
        0.1232, 2.4615, 0.1044, 0.0854, 2.2905, 2.5783, 0.0126, 2.6780, 1.1080,
        1.2150, 1.4406, 1.6578, 1.9349, 2.1632, 1.8766, 0.9697, 0.2795, 2.2118,
        2.2412, 2.8711, 1.1761, 2.9956, 0.1928, 0.4003, 2.0546, 1.2710, 2.3422,
        3.1148, 1.1629, 2.0339, 1.6108, 1.1517, 3.1112, 1.0369, 0.9851, 2.4171,
        0.6416, 0.7770, 2.7524, 0.0611, 2.4791, 1.6450, 2.9998, 1.1256, 0.2742,
        1.3886, 1.7065, 2.1580, 0.9913, 1.1825, 0.9546, 2.6626, 0.3753, 2.0896,
        1.9503, 0.3263, 2.2919, 1.2989, 0.9477, 0.2290, 1.0727, 1.8689, 2.0135,
        1.1785, 0.1546, 0.9936, 1.3871, 1.7707, 2.1572, 1.0285, 1.7857, 0.3325,
        0.7179, 2.6163, 0.0171, 0.8345, 0.3793, 1.6738, 1.7009, 2.0428, 0.5623,
        0.3952, 0.8667, 2.9115, 1.8327, 0.9459, 2.2857, 2.1890, 0.0991, 0.8331,
        2.1962, 0.8726, 3.0540, 0.0

In [22]:
a2.grad

tensor([-0.0217], device='cuda:0')

In [None]:
# randperm