In [1]:
import sys

import torch
import torch.nn as nn
import torch.nn.functional as F
import torch.optim as optim
from torch.utils.data import DataLoader, TensorDataset
from torch.autograd import grad

import einops
import math

sys.path.append('../python')
%load_ext autoreload
%autoreload 2
from Lorentz_loss import LorentzLoss
sys.path.append('..')


 






## A very simple test of what we've done so far

we define a Lorentz scalar function :


$$
f(x) = \frac{1}{\sqrt{20}}\left(z+\frac{z^4}{4}-1\right)+1\;\;,\;\;z = x_\mu\eta^{\mu\nu}x_\nu = t^2-\boldsymbol{x}^2
$$

The reason for this shape is that for $x_\mu\sim{\cal N}(0,1)$, $f(x)$ will be a random variable with a mean and variance of 1.

In [77]:
def scalar_val(x):
    # assume shape [..., N, 4]
    if x.shape[-1]!=4:
        raise ValueError('last dim should be 4')
    eta = torch.tensor([1, -1, -1, -1])
    xx = torch.einsum('... i, i,... i -> ... ', x, eta, x)
    return (xx + xx**2/4-1) / math.sqrt(20)+1




In [96]:
# generate data and check mean/var
num_vec = 1000
# Generate data and labels
data = torch.randn([100,num_vec, 4])  # Using a batch size of 10
labels = scalar_val(data)

torch.var_mean(scalar_val(data)), data.shape,labels.shape

((tensor(0.9963), tensor(1.0042)),
 torch.Size([100, 1000, 4]),
 torch.Size([100, 1000]))

In [110]:
# a model

class Block(nn.Module):
    def __init__(self, d_in, d_out, bias=True):
        super(Block, self).__init__()
        self.fc = nn.Linear(d_in, d_out, bias=bias)
        
    def forward(self, x):
        x = self.fc(x)
        x = nn.functional.selu(x)
        return x

class CrazyModel(nn.Module):
    def __init__(self,n_hid=2,d_hid=10):
        super(CrazyModel, self).__init__()
        modules = [Block(4, d_hid)]
        modules += [Block(d_hid,d_hid) for _ in range(n_hid)]
        modules += [Block(d_hid,1)]

        self.blocks = nn.Sequential(*modules)

    def forward(self, x):
        return self.blocks(x).squeeze()



In [115]:
# Initialize model, loss, and optimizer


model = CrazyModel()
model.train()
cirterion = nn.MSELoss(reduction = 'mean')

learning_rate = 3e-2
weight_deacy  = 1e-3


optimizer = torch.optim.SGD(model.parameters(), lr=learning_rate,weight_decay=weight_deacy)


# data loader
batch_size = 64  # Number of samples per batch
dataset = torch.utils.data.TensorDataset(data, labels)
dataloader = torch.utils.data.DataLoader(dataset, batch_size=batch_size, shuffle=True)




In [116]:
# Initialize model, loss, and optimizer
iters = 10000

for iter in range(iters):
    for batch_data, batch_labels in dataloader:
        preds = model(batch_data)
        loss = cirterion(preds, batch_labels)
        
        optimizer.zero_grad()
        loss.backward()
        optimizer.step()

    if (iter % 1000 == 0 and iter > 0) or iter == 100 or iter == 500:
        print(f'iter: {iter}\t|\tloss: {loss.item()}')


iter: 100	|	loss: 0.9478254914283752
iter: 500	|	loss: 0.5768612027168274
iter: 1000	|	loss: 0.4849708080291748
iter: 2000	|	loss: 0.40425583720207214
iter: 3000	|	loss: 0.27159059047698975
iter: 4000	|	loss: 0.18058785796165466
iter: 5000	|	loss: 0.1503157615661621
iter: 6000	|	loss: 0.14825136959552765
iter: 7000	|	loss: 0.13348020613193512
iter: 8000	|	loss: 0.13562260568141937
iter: 9000	|	loss: 0.128230020403862
