In [1]:
import torch
from torch import nn

import numpy as np
import numpy.random as random
import random
import math

In [2]:
# initial settings

# torch.cuda.is_available() checks and returns a Boolean True if a GPU is available, else it'll return False
is_cuda = torch.cuda.is_available()

# If we have a GPU available, we'll set our device to GPU. We'll use this device variable later in our code.
if is_cuda:
    device = torch.device("cuda")
    print("GPU is available")
else:
    device = torch.device("cpu")
    print("GPU not available, CPU used")
random.seed(1234)
np.random.seed(0)
torch.manual_seed(1234)

GPU not available, CPU used


<torch._C.Generator at 0x7f8cf027fa10>

## 1. Build the 2D RNN
- tensorized RNN cell as in Hibat-Allah 2021

In [3]:
class TensorizedGRU(nn.Module):
    """ Custom GRU layer for 2D input """
    def __init__(self, input_size, hidden_size):
        super().__init__()
        
        self.input_size  = input_size
        self.hidden_size = hidden_size
        self.sigmoid = torch.nn.Sigmoid()
        self.tanh    = torch.nn.Tanh()
        
          
        # define all weights
        w1      = torch.empty(self.hidden_size, 2*self.hidden_size, 2*self.input_size)
        self.W1 = nn.Parameter(w1)  # nn.Parameter is a Tensor that's a module parameter.
        b1      = torch.empty(self.hidden_size)
        self.b1 = nn.Parameter(b1)
        
        w2      = torch.empty(self.hidden_size, 2*self.hidden_size, 2*self.input_size)
        self.W2 = nn.Parameter(w2)  
        b2      = torch.empty(self.hidden_size)
        self.b2 = nn.Parameter(b2)
        
        w3      = torch.empty(2*self.hidden_size, self.hidden_size)
        self.W3 = nn.Parameter(w3) 

        self.reset_parameters()
        
    def reset_parameters(self):
        nn.init.xavier_uniform_(self.W1, 1)
        nn.init.xavier_uniform_(self.W2, 1)
        nn.init.xavier_uniform_(self.W3, 1)
        fan_in, fan_out = nn.init._calculate_fan_in_and_fan_out(self.W1)
        lim = np.sqrt(3.0 / (0.5*(fan_in+fan_out)))
        nn.init.uniform_(self.b1, -lim, lim)
        fan_in, fan_out = nn.init._calculate_fan_in_and_fan_out(self.W2) 
        lim = np.sqrt(3.0 / (0.5*(fan_in+fan_out)))
        nn.init.uniform_(self.b2, -lim, lim)
   

    def forward(self, inputs, states):
        if len(inputs[0].size()) == 3:
            inputs[0] = inputs[0][:,0,:]
        if len(inputs[1].size()) == 3:
            inputs[1] = inputs[1][:,0,:]

        inputstate_mul = torch.einsum('ij,ik->ijk', torch.concat((states[0], states[1]), 1),torch.concat((inputs[0], inputs[1]),1))
        # prepare input linear combination
        state_mul1 = torch.einsum('ijk,ljk->il', inputstate_mul, self.W1) # [batch_sz, num_units]
        state_mul2 = torch.einsum('ijk,ljk->il', inputstate_mul, self.W2) # [batch_sz, num_units]

        u = self.sigmoid(state_mul2 + self.b2)
        state_tilda = self.tanh(state_mul1 + self.b1) 

        new_state = u*state_tilda 
        new_state += (1.-u)*torch.einsum('ij,jk->ik', torch.concat((states[0], states[1]), 1), self.W3)
        output = new_state
        return output, new_state



class Model(nn.Module):
    def __init__(self, input_size, system_size_x, system_size_y, hidden_dim, n_layers, sz_tot = None):
        super(Model, self).__init__()
        """
        Creates RNN consisting of GRU cells.
        Inputs:
            - input_size:  number of quantum numbers (i.e. 2 for spin-1/2 particles)
            - system_size: length of each snapshot
            - hidden_dim:  dimension of hidden states
            - n_layers:    number of layers of the GRU
        """

        # Defining some parameters
        self.input_size  = input_size    # number of expected features in input data
        self.output_size = input_size    # number of expected features in output data
        self.N_x         = system_size_x # length of generated samples in x dir
        self.N_y         = system_size_y # length of generated samples in x dir
        self.hidden_dim  = hidden_dim    # number of features in the hidden state
        self.n_layers    = n_layers      # number of stacked GRUs
        self.sz_tot      = sz_tot        # total magnetization if u(1) symmetry is applied (default: None)
        self.system_size = system_size_x*system_size_y
        #Defining the layers
        self.rnn  = TensorizedGRU(self.input_size, hidden_dim)   
        self.lin1 = nn.Linear(hidden_dim, self.output_size)
        self.lin2 = nn.Linear(hidden_dim, self.output_size)
        #self.s    = torch.softmax(dim=0)
        self.soft = nn.Softsign()
        
        self.get_num_parameters()
        
    def forward(self, x, hidden):
        """
        Passes the input through the network.
        Inputs:
            - x:      input state at t
            - hidden: hidden state at t
        Outputs:
            - out:    output configuration at t+1
            - hidden: hidden state at t+1
        """
        
        # Passing in the input and hidden state into the model and obtaining outputs
        out, hidden = self.rnn(x, hidden)
        
        # Reshaping the outputs such that it can be fit into the dense layer
        out = out.contiguous().view(-1, self.hidden_dim)
        
        return out, hidden
    
    def init_hidden(self, batch_size):
        """
        Generates the hidden state for a given batch size.
        """
        # This method generates the first hidden state of zeros for the forward pass and passes it to the device.
        # This is equivalent to a product state.
        hidden = torch.zeros((batch_size, self.hidden_dim), dtype=torch.float64).to(device)
        return hidden
    
    def get_num_parameters(self):
        """
        Calculates the number of parameters of the network. """
        p = 0
        for param in list(self.parameters()):
            if param.requires_grad:
                p += param.numel()
        print("Total number of parameters in the network: "+str(p))
        return p
    
    def _gen_samples(self, nx, ny, direction, inputs, hidden_inputs, numsamples):
        # pass the hidden unit and sigma into the GRU cell at t=i 
        # and get the output y (will be used for calculating the 
        # probability) and the next hidden state
        full_sigma = [inputs[str(nx+direction[0])+str(ny)],inputs[str(nx)+str(ny+direction[1])]]
        hidden     = [hidden_inputs[str(nx+direction[0])+str(ny)],hidden_inputs[str(nx)+str(ny+direction[1])]]
        y, hidden  = self.forward(full_sigma, hidden)
        # the amplitude is given by a linear layer with a softmax activation
        ampl = self.lin1(y)
        ampl = torch.softmax(ampl,dim=1) # amplitude, all elements in a row sum to 1
        # the phase is given by a linear layer with a softsign activation
        phase = self.lin2(y)
        phase = self.soft(phase) 
        # samples are obtained by sampling from the amplitudes
        sample = torch.multinomial(ampl, 1) 
        # one hot encode the current sigma to pass it into the GRU at
        # the next time step
        sigma = nn.functional.one_hot(sample, 2).double()
        
        return sample[:,0], sigma, ampl, torch.mul(torch.pi,phase), hidden
    
    
    def sample(self, num_samples):
        """
        Generates num_samples samples from the network and returns the samples,
        their log probabilities and phases.
        """
        # generate a first input of zeros (sigma and hidden states) to the first GRU cell at t=0
        sigma       = torch.zeros((num_samples,2), dtype=torch.float64).to(device)
        inputs = {}
        hidden_inputs = {}
        for ny in range(-1, self.N_y): # add a padding for the inputs and hidden states
            for nx in range(-1, self.N_x+1):
                inputs[str(nx)+str(ny)] = sigma
                hidden_inputs[str(nx)+str(ny)] = self.init_hidden(num_samples)
                
        samples     = [[[] for ny in range(self.N_y)] for nx in range(self.N_x)]
        ampl_probs  = [[[] for ny in range(self.N_y)] for nx in range(self.N_x)]
        phase_probs = [[[] for ny in range(self.N_y)] for nx in range(self.N_x)]
        ohs         = [[[] for ny in range(self.N_y)] for nx in range(self.N_x)]
        for ny in range(self.N_y):
            if ny % 2 == 0: #go from left to right
                for nx in range(self.N_x):
                    direction = [-1,-1]
                    samples[nx][ny], sigma, ampl_probs[nx][ny], phase_probs[nx][ny], hidden_inputs[str(nx)+str(ny)] = self._gen_samples(nx, ny, direction, inputs, hidden_inputs, num_samples)
                    inputs[str(nx)+str(ny)] = sigma
                    ohs[nx][ny] = sigma
            else: #go from right to left
                for nx in range(self.N_x-1, -1, -1):
                    direction = [1,-1]
                    samples[nx][ny], sigma, ampl_probs[nx][ny], phase_probs[nx][ny], hidden_inputs[str(nx)+str(ny)] = self._gen_samples(nx, ny, direction, inputs, hidden_inputs, num_samples)
                    inputs[str(nx)+str(ny)] = sigma
                    ohs[nx][ny] = sigma
                    
        samples = torch.stack([torch.stack(s, axis=1) for s in samples], axis=1) #.reshape((num_samples, self.N_x, self.N_y))
        ampl_probs = torch.cat([torch.stack(a, axis=1) for a in ampl_probs], axis=1) #.reshape((num_samples, self.N_x*self.N_y, 2))
        phase_probs = torch.cat([torch.stack(p, axis=1) for p in phase_probs], axis=1) #.reshape((num_samples, self.N_x*self.N_y, 2))
        ohs = torch.cat([torch.cat(o, axis=1) for o in ohs], axis=1) #.reshape((num_samples, self.N_x*self.N_y, 2))
        # calculate the wavefunction and split it into amplitude and phase
        log_probs_ampl = torch.sum(torch.log(torch.sum(torch.torch.multiply(ampl_probs,ohs), axis =2)), axis=1)
        phase = torch.sum((torch.sum(torch.torch.multiply(phase_probs,ohs), axis =2)), axis=1)
        return samples, log_probs_ampl, phase
    
    def _gen_probs(self, nx, ny, direction, sample, inputs, hidden_inputs):
        # pass the hidden unit and sigma into the GRU cell at t=i 
        # and get the output y (will be used for calculating the 
        # probability) and the next hidden state
        full_sigma = [inputs[str(nx+direction[0])+str(ny)],inputs[str(nx)+str(ny+direction[1])]]
        hidden     = [hidden_inputs[str(nx+direction[0])+str(ny)],hidden_inputs[str(nx)+str(ny+direction[1])]]
        y, hidden  = self.forward(full_sigma, hidden)
        # the amplitude is given by a linear layer with a softmax activation
        ampl = self.lin1(y)
        ampl = torch.softmax(ampl,dim=1) # amplitude, all elements in a row sum to 1
        # the phase is given by a linear layer with a softsign activation
        phase = self.lin2(y)
        phase = self.soft(phase) 
        # one hot encode the current sigma to pass it into the GRU at
        # the next time step
        sigma = nn.functional.one_hot(sample.reshape((sample.size()[0],1)), 2).double()
        
        return sigma, ampl, torch.mul(torch.pi,phase), hidden
    
    def log_probabilities(self, samples):
        """
        Calculates the log probability and the phase of each item in samples.
        """
        # reshape samples
        num_samples = samples.size()[0]
        samples = samples.clone().detach()
        samples = [[samples[:,nx,ny] for ny in range(self.N_y)] for nx in range(self.N_x)]
        
        # generate a first input of zeros (sigma and hidden states) to the first GRU cell at t=0
        sigma  = torch.zeros((num_samples,2), dtype=torch.float64).to(device)
        inputs = {}
        hidden_inputs = {}
        for ny in range(-1, self.N_y):
            for nx in range(-1, self.N_x+1):
                inputs[str(nx)+str(ny)] = sigma
                hidden_inputs[str(nx)+str(ny)] = self.init_hidden(num_samples)

        ampl_probs  = [[[] for ny in range(self.N_y)] for nx in range(self.N_x)]
        phase_probs = [[[] for ny in range(self.N_y)] for nx in range(self.N_x)]
        ohs         = [[[] for ny in range(self.N_y)] for nx in range(self.N_x)]
        for ny in range(self.N_y):
            if ny % 2 == 0: #go from left to right
                for nx in range(self.N_x):
                    direction = [-1,-1]
                    sigma, ampl_probs[nx][ny], phase_probs[nx][ny], hidden_inputs[str(nx)+str(ny)] = self._gen_probs(nx, ny, direction, samples[nx][ny], inputs, hidden_inputs)
                    ohs[nx][ny] = sigma
                    inputs[str(nx)+str(ny)] = sigma
            else: #go from right to left
                for nx in range(self.N_x-1, -1, -1):
                    direction = [1,-1]
                    sigma, ampl_probs[nx][ny], phase_probs[nx][ny], hidden_inputs[str(nx)+str(ny)] = self._gen_probs(nx, ny, direction, samples[nx][ny], inputs, hidden_inputs)
                    ohs[nx][ny] = sigma
                    inputs[str(nx)+str(ny)] = sigma
        ampl_probs = torch.cat([torch.stack(a, axis=1) for a in ampl_probs], axis=1) #.reshape((num_samples, self.N_x*self.N_y, 2))
        phase_probs = torch.cat([torch.stack(p, axis=1) for p in phase_probs], axis=1) #.reshape((num_samples, self.N_x*self.N_y, 2))
        ohs = torch.cat([torch.cat(o, axis=1) for o in ohs], axis=1) #.reshape((num_samples, self.N_x*self.N_y, 2))
        # calculate the wavefunction and split it into amplitude and phase
        log_probs_ampl = torch.sum(torch.log(torch.sum(torch.torch.multiply(ampl_probs,ohs), axis =2)), axis=1)
        phase = torch.sum((torch.sum(torch.torch.multiply(phase_probs,ohs), axis =2)), axis=1)
        return log_probs_ampl, phase
        

In [4]:
Nx = 2
Ny = 2
hiddendim  = 15
numsamples = 10

# Instantiate the model with hyperparameters
model = Model(input_size=2, system_size_x=Nx, system_size_y = Ny, hidden_dim=hiddendim, n_layers=1, sz_tot=0)
# We'll also set the model to the device that we defined earlier (default is CPU)
model = model.to(device)
print(model)
model = model.double()
for p in list(model.parameters()):
    print(p.size())
    print(p)

Total number of parameters in the network: 4144
Model(
  (rnn): TensorizedGRU(
    (sigmoid): Sigmoid()
    (tanh): Tanh()
  )
  (lin1): Linear(in_features=15, out_features=2, bias=True)
  (lin2): Linear(in_features=15, out_features=2, bias=True)
  (soft): Softsign()
)
torch.Size([15, 30, 4])
Parameter containing:
tensor([[[-1.7199e-01, -3.5822e-02, -8.7693e-02, -4.8696e-02],
         [-1.6129e-01,  7.3265e-02, -1.6366e-01, -1.1634e-02],
         [ 6.3465e-02, -6.1541e-02,  1.0360e-01,  2.3027e-02],
         ...,
         [-1.5881e-01, -1.8020e-01,  1.2630e-03, -7.0050e-02],
         [-4.5944e-02, -2.5674e-02,  1.7266e-01,  1.7306e-01],
         [-1.7070e-02, -5.4824e-02,  8.8665e-02, -1.4569e-02]],

        [[-1.7372e-01,  5.9513e-02,  1.7479e-01, -1.4472e-01],
         [-3.9431e-02,  3.9594e-02, -1.4867e-01, -1.5631e-01],
         [ 1.3630e-01, -6.4649e-02,  1.0650e-01,  1.6322e-02],
         ...,
         [-1.6735e-01, -1.7476e-01,  6.1941e-02, -1.7032e-01],
         [-1.4962e-01,  

In [5]:
#test the sampling method
samples, log_probs, phase = model.sample(1000)

samples = torch.unique(samples, dim=0)
print(samples.size())

2**(Nx*Ny)

torch.Size([16, 2, 2])


16

In [6]:
# test the probability method
log_probs, phases = model.log_probabilities(samples)
print(log_probs.size())
print(phases.size())
print(torch.sum(torch.mul(torch.exp(0.5*log_probs+1j*phases),torch.exp(0.5*log_probs+1j*phases).conj())))


torch.Size([16])
torch.Size([16])
tensor(1.0000+0.j, dtype=torch.complex128, grad_fn=<SumBackward0>)


### 2. Calculate the matrix elements (here 2D XXZ model)

$$ E_{\theta}^{loc}(x) = \frac{<x|H|\psi_\theta>}{<x|\psi_\theta>} = H_{diag}(x)+H_{offd}(x)\frac{<x^{\prime}|\psi_\theta>}{<x|\psi_\theta>} $$
with $\hat{H}_{offd}|x^{\prime}>=H_{offd}(x)|x^{\prime}>$ and ${<x|\psi_\theta>}$ given by the square root of the exponential of model.log_probabilities(x) defined above.

- for $J_p = 0$ and $J_z = 1$: $E_{loc} = H_{diag}(x) = 0.25*J_z*systemsize$

In [45]:
def XXZ1D_MatrixElements(Jp, Jz, samples, length):
    """ 
    Calculate the local energies of 1D XXZ model given a set of set of samples.
    Returns: The local energies that correspond to the input samples.
    Inputs:
    - sample: (num_samples, N)
    - Jp: float
    - Jz: float
    """

    N = samples.size()[1]
    numsamples = samples.size()[0]
    
    #diagonal elements
    diag_matrixelements = torch.zeros((numsamples, length))
    #diagonal elements from the SzSz term 
    for i in range(length): 
        values  = samples[:,i]+samples[:,(i+1)%N]
        valuesT = values.clone()
        valuesT[values==2] = +1 #If both spins are up
        valuesT[values==0] = +1 #If both spins are down
        valuesT[values==1] = -1 #If they are opposite
        diag_matrixelements[:,i] = valuesT.reshape((numsamples))*Jz*0.25
    
    #off-diagonal elements from the S+S- terms
    offd_matrixelements = torch.zeros((numsamples, length))
    xprime = []
    for i in range(length): 
        values = samples[:,i]+samples[:,(i+1)%N]
        valuesT = values.clone()
        #flip the spins
        new_samples             = samples.clone()
        new_samples[:,(i+1)%N]  = samples[:,i]
        new_samples[:,i]        = samples[:,(i+1)%N]
        valuesT[values==2]      = 0 #If both spins are up
        valuesT[values==0]      = 0 #If both spins are down
        valuesT[values==1]      = 1 #If they are opposite
        offd_matrixelements[:,i] = valuesT.reshape((numsamples))*Jp*0.5
        xprime.append(new_samples)
    return diag_matrixelements, offd_matrixelements, torch.stack(xprime, axis=0)

def XXZ1D_Eloc(Jp, Jz, samples, RNN, boundaries):
    """ 
    Calculate the local energies of 1D XXZ model given a set of set of samples.
    Returns: The local energies that correspond to the input samples.
    Inputs:
    - sample: (num_samples, N)
    - Jp: float
    - Jz: float
    - boundaries: str, open or periodic
    """

    N          = samples.size()[1]
    numsamples = samples.size()[0]
    if boundaries == "periodic":
        length = N
    else:
        length = N-1
    
    queue_samples       = torch.zeros((length+1, numsamples, N, 1), dtype = torch.int32) 
    log_probs           = np.zeros((length+1)*numsamples, dtype=np.float64) 
    
    #matrix elements
    diag_me, offd_me, new_samples = XXZ1D_MatrixElements(Jp, Jz, samples, length)
    diag_me = torch.sum(diag_me, axis=1)
    offd_me = offd_me.to(torch.complex64)
    # diagonal elements
    queue_samples[0] = samples
    Eloc = diag_me.to(torch.complex64)
    #off-diagonal elements
    
    offd_Eloc = np.zeros((numsamples), dtype = np.float64)
    queue_samples[1:] = new_samples
    queue_samples_reshaped = np.reshape(queue_samples, [(length+1)*numsamples, N, 1])
    log_probs, phases = model.log_probabilities(queue_samples_reshaped.to(torch.int64))
    log_probs_reshaped = torch.reshape(log_probs, (length+1,numsamples)).to(torch.complex64)
    phases_reshaped = torch.reshape(phases, (length+1,numsamples))
    for i in range(1,length+1):
        tot_log_probs = 0.5*(log_probs_reshaped[i,:]-log_probs_reshaped[0,:])
        tot_log_probs += 1j*(phases_reshaped[i,:]-phases_reshaped[0,:])
        Eloc += offd_me[:,i-1]*(torch.exp(tot_log_probs))
    return Eloc
    
def XXZ2D_MatrixElements(Jp, Jz, samples, length_x, length_y):
    """ 
    Calculate the local energies of 2D XXZ model given a set of set of samples.
    Returns: The local energies that correspond to the input samples.
    Inputs:
    - samples: (num_samples, N)
    - Jp: float
    - Jz: float
    - length_x: system length in x dir
    - length_y: system length in y dir
    """

    Nx         = samples.size()[1]
    Ny         = samples.size()[2]
    numsamples = samples.size()[0]
    
    #diagonal elements
    diag_matrixelements = torch.zeros((numsamples))
    #diagonal elements from the SzSz term 
    for n in range(numsamples):
        for i in range(Nx): 
            for j in range(Ny):
                if i != length_x:
                    if samples[n,i,j] != samples[n,(i+1)%Nx,j]:
                        diag_matrixelements[n] += -Jz*0.25
                    else:
                        diag_matrixelements[n] += Jz*0.25
                if j != length_y:
                    if samples[n,i,j] != samples[n,i,(j+1)%Ny]:
                        diag_matrixelements[n] += -Jz*0.25
                    else:
                        diag_matrixelements[n] += Jz*0.25
    
    #off-diagonal elements from the S+S- terms
    offd_matrixelements = torch.zeros((numsamples, length_x*length_y*2))
    xprime = torch.zeros((length_x*length_y*2, numsamples, Nx, Ny))
    if Jp!=0:
        for n in range(numsamples):
            num = 0
            for i in range(length_x): 
                for j in range(length_y):
                    new_sample = samples[n].clone()
                    if i != length_x:
                        if samples[n,i,j] != samples[n,(i+1)%Nx,j]:
                            new_sample[(i+1)%Nx,j]   = samples[n,i,j]
                            new_sample[i,j]          = samples[n,(i+1)%Nx, j]
                            offd_matrixelements[n,num] += Jp*0.5
                            xprime[num,n]=new_sample
                            num +=1
                    if j != length_y:
                        if samples[n,i,j] != samples[n,i,(j+1)%Ny]:
                            new_sample[i,(j+1)%Ny]   = samples[n,i,j]
                            new_sample[i,j]          = samples[n,i,(j+1)%Ny]
                            offd_matrixelements[n,num] += Jp*0.5
                            xprime[num,n]=new_sample
                            num +=1
                    
    return diag_matrixelements, offd_matrixelements, xprime


def XXZ2D_Eloc(Jp, Jz, samples, RNN, boundaries):
    """ 
    Calculate the local energies of 2D XXZ model given a set of set of samples.
    Returns: The local energies that correspond to the input samples.
    Inputs:
    - sample: (num_samples, N)
    - Jp: float
    - Jz: float
    - RNN: RNN model
    - boundaries: str, open or periodic
    """

    Nx         = samples.size()[1]
    Ny         = samples.size()[2]
    numsamples = samples.size()[0]
    if boundaries == "periodic":
        length_x = Nx
        length_y = Ny
    elif "open":
        length_x = Nx-1
        length_y = Ny-1
    else:
        raise "Boundary "+boundaries+" not implemented"
    
    queue_samples       = torch.zeros((length_x*length_y*2+1, numsamples, Nx, Ny), dtype = torch.int32) 
    log_probs           = np.zeros((length_x*length_y*2+1)*numsamples, dtype=np.float64) 
    
    #matrix elements
    diag_me, offd_me, new_samples = XXZ2D_MatrixElements(Jp, Jz, samples, length_x, length_y)
    offd_me = offd_me.to(torch.complex64)
    # diagonal elements
    queue_samples[0] = samples
    Eloc = diag_me.to(torch.complex64)
    #off-diagonal elements
    if Jp != 0:
        offd_Eloc = np.zeros((numsamples), dtype = np.float64)
        queue_samples[1:] = new_samples
        queue_samples_reshaped = np.reshape(queue_samples, [(length_x*length_y*2+1)*numsamples, Nx, Ny])
        log_probs, phases = model.log_probabilities(queue_samples_reshaped.to(torch.int64))
        log_probs_reshaped = torch.reshape(log_probs, (length_x*length_y*2+1,numsamples)).to(torch.complex64)
        phases_reshaped = torch.reshape(phases, (length_x*length_y*2+1,numsamples))
        for i in range(1,length_x*length_y*2+1):
            tot_log_probs = 0.5*(log_probs_reshaped[i,:]-log_probs_reshaped[0,:])
            tot_log_probs += 1j*(phases_reshaped[i,:]-phases_reshaped[0,:])
            Eloc += offd_me[:,i-1]*(torch.exp(tot_log_probs))
    return Eloc

In [46]:
#simple tests
Jp = 1
Jz = 1
boundaries = "open"

samples, log_probs, phase = model.sample(10)
print(samples)
print("-------")
local_energy = XXZ2D_Eloc(Jp, Jz, samples, model, boundaries)
print(local_energy)
print(log_probs)



tensor([[[0, 0, 0],
         [1, 1, 0],
         [1, 0, 0]],

        [[0, 0, 1],
         [1, 1, 0],
         [1, 0, 0]],

        [[1, 1, 0],
         [0, 0, 0],
         [1, 0, 0]],

        [[0, 0, 1],
         [0, 1, 0],
         [0, 0, 0]],

        [[0, 0, 1],
         [1, 1, 0],
         [1, 0, 0]],

        [[1, 0, 0],
         [1, 0, 0],
         [1, 0, 0]],

        [[0, 0, 1],
         [1, 1, 0],
         [0, 0, 0]],

        [[1, 1, 1],
         [0, 0, 0],
         [1, 0, 0]],

        [[1, 0, 1],
         [0, 1, 0],
         [1, 0, 0]],

        [[1, 1, 0],
         [0, 0, 0],
         [1, 0, 0]]])
-------
tensor([-0.3813-0.0345j, -1.4499+0.0267j, -0.4516+0.0013j,  2.3735+0.0187j,
        -1.4499+0.0267j,  0.0762+0.0663j, -1.5102+0.0840j, -0.3739-0.0235j,
        -4.0164-0.0311j, -0.4516+0.0013j], grad_fn=<AddBackward0>)
tensor([-3.1093, -2.1320, -2.1251, -7.9958, -2.1320, -4.0673, -3.3062, -2.9953,
        -2.6132, -2.1251], dtype=torch.float64, grad_fn=<SumBackward1>)


### Train

In [47]:
random.seed(1234)
np.random.seed(0)
torch.manual_seed(4321)
random.seed(10)

In [91]:
# Define model parameters
Jp         = 1
Jz         = 1
Nx         = 4
Ny         = 4
bounds     = "open"

# Define hyperparameters
n_epochs   = 3000
lr         = 0.01
hidden_dim = 10

folder = "with_total_sz_cost/"

model = Model(input_size=2, system_size_x=Nx,system_size_y=Ny, hidden_dim=hiddendim, n_layers=1, sz_tot=None)
model = model.to(device)
model = model.double()
for p in list(model.parameters()):
    print(p)


# Optimizer and cost function
optimizer = torch.optim.Adam(model.parameters(), lr=lr)

def cost_fct(samples, model, Jp, Jz, log_probs, phases, boundaries, sz_tot=0):
    Eloc = XXZ2D_Eloc(Jp, Jz, samples, model, boundaries)
    log_psi = (0.5*log_probs+1j*phases)
    eloc_sum = (Eloc).mean(axis=0)
    e_loc_corr = (Eloc - eloc_sum).detach()
    if sz_tot != None:
        e_loc_corr += (get_sz_(samples).detach()-sz_tot*torch.ones((samples.size()[0])))**2
    cost = 2 * torch.real((torch.conj(log_psi) * e_loc_corr.to(torch.complex128))).mean(axis=0)
    return Eloc, cost



Total number of parameters in the network: 4144
Parameter containing:
tensor([[[ 1.0566e-01, -6.0767e-05,  1.1456e-01,  1.0755e-01],
         [-2.9062e-03, -1.0438e-01,  6.9534e-02,  1.6747e-01],
         [ 2.3354e-03,  2.1005e-02,  1.3408e-01,  1.4956e-01],
         ...,
         [ 1.3772e-01,  1.4667e-01,  1.4424e-02,  1.8132e-01],
         [ 4.1480e-02, -8.9484e-02,  1.7379e-01, -5.9462e-02],
         [ 1.1433e-01,  1.4143e-01, -1.7633e-01, -3.1094e-02]],

        [[-1.3762e-01,  9.8594e-02, -3.6308e-02,  1.6726e-01],
         [-1.6004e-01,  1.1517e-02,  8.8071e-02,  6.3117e-03],
         [ 1.4016e-01, -5.8708e-02,  1.4204e-01, -1.2518e-01],
         ...,
         [-1.5076e-01,  5.6153e-02, -1.0839e-01,  6.6478e-02],
         [-1.3062e-01, -1.0684e-01,  1.2778e-01,  1.6356e-01],
         [ 1.1809e-01, -1.0066e-01,  1.3641e-01,  9.7207e-03]],

        [[ 3.0131e-02, -3.5614e-02, -1.3568e-01, -1.5696e-01],
         [-5.2084e-02,  1.2624e-01, -7.4834e-02, -1.9129e-02],
         [-1.449

In [92]:
# observables that can be evaluated during the training or afterwards
def get_length(samples):
    Nx = samples.size()[1]
    Ny = samples.size()[2]
    if boundaries == "periodic":
        length_x = Nx
        length_y = Ny
    else:
        length_x = Nx-1
        length_y = Ny-1
    return Nx, Ny, length_x, length_y


def get_szsz(samples, log_probs, boundaries):
    Nx, Ny, length_x, length_y = get_length(samples)
    szsz = torch.zeros((samples.size()[0], length_x, length_y))
    s = samples.clone().detach() 
    s[samples == 0] = -1
    for i in range(length_x):
        for j in range(length_y):
            szsz[:,i,j] = s[:,i,j]*s[:,(i+1)%Nx,j]
            szsz[:,i,j] += s[:,i,j]*s[:,i,(j+1)%Ny]
    return torch.mean(szsz, axis=0)*1/4

def get_sxsx(samples, log_probs, phases, boundaries):
    Nx, Ny, length_x, length_y = get_length(samples)
    sxsx = torch.zeros((samples.size()[0], length_x, length_y))
    for i in range(length_x):
        for j in range(length_y):
            for d in [[1,0], [0,1]]:
                s1 = flip_neighbor_spins(samples, i, j, d, Nx, Ny)
                log_probs1, phases1 = model.log_probabilities(s1)
                sxsx[:,i,j] += torch.real(torch.exp(0.5*(log_probs1-log_probs))*torch.exp(1j*(phases1-phases)))
    return torch.mean(sxsx, axis=0)*1/4

def get_sysy(samples, log_probs, phases, boundaries):
    Nx, Ny, length_x, length_y = get_length(samples)
    sysy = torch.zeros((samples.size()[0], length_x, length_y))
    for i in range(length_x):
        for j in range(length_y):
            for d in [[1,0], [0,1]]:
                s1 = flip_neighbor_spins(samples, i, j, d, Nx, Ny)
                log_probs1, phases1 = model.log_probabilities(s1)
                s1 = s1.to(torch.complex64)
                s1[:,i,j][s1[:,i,j] == 1] = -1j
                s1[:,i,j][s1[:,i,j] == 0] = 1j
                s1[:,(i+d[0])%Nx,(j+d[1])%Ny][s1[:,(i+d[0])%Nx,(j+d[1])%Ny] == 1] = -1j
                s1[:,(i+d[0])%Nx,(j+d[1])%Ny][s1[:,(i+d[0])%Nx,(j+d[1])%Ny] == 0] = 1j
                sysy[:,i,j] += torch.real(torch.exp(0.5*(log_probs1-log_probs))*torch.exp(1j*(phases1-phases))*s1[:,i,j]*s1[:,(i+d[0])%Nx,(j+d[0])%Ny])
    return torch.mean(sysy, axis=0)*1/4

def get_sz_(samples):
    # used in the cost function, no averaging here!
    Nx = samples.size()[1]
    Ny = samples.size()[2]
    sz = torch.zeros((samples.size()[0], Nx, Ny))
    s = samples.clone().detach() 
    s[samples == 0] = -1
    sz = s.to(torch.float64)
    return torch.sum(torch.sum(sz, axis=2), axis=1) *1/2 

def get_sz(samples):
    Nx = samples.size()[1]
    Ny = samples.size()[2]
    sz = torch.zeros((samples.size()[0], Nx, Ny))
    s = samples.clone().detach() 
    s[samples == 0] = -1
    sz = s.to(torch.float64)
    return torch.sum(torch.mean(sz, axis=0)*1/2) / (Nx*Ny)

def get_sx(samples, log_probs, phases):
    Nx = samples.size()[1]
    Ny = samples.size()[2]
    sx = torch.zeros((samples.size()[0], Nx, Ny))
    for i in range(Nx):
        for j in range(Ny):
            s1 = flip_spin(samples, i,j)
            log_probs1, phases1 = model.log_probabilities(s1)
            sx[:,i,j] = torch.exp(0.5*(log_probs1-log_probs))*torch.exp(1j*(phases1-phases))
    return torch.sum(torch.mean(sx, axis=0)*1/2) / (Nx*Ny)

def get_sy(samples, log_probs, phases):
    Nx = samples.size()[1]
    Ny = samples.size()[2]
    sy = torch.zeros((samples.size()[0], Nx, Ny))
    for i in range(Nx):
        for j in range(Ny):
            s1 = flip_spin(samples, i,j)
            log_probs1, phases1 = model.log_probabilities(s1)
            s1 = s1.to(torch.complex64)
            s1[:,i,j][s1[:,i,j] == 1] = -1j
            s1[:,i,j][s1[:,i,j] == 0] = 1j
            sy[:,i,j] = torch.exp(0.5*(log_probs1-log_probs))*torch.exp(1j*(phases1-phases))*s1[:,i,j]
    return torch.sum(torch.mean(sy, axis=0)*1/2) / (Nx*Ny)


def flip_neighbor_spins(samples, i,j, direction, Nx, Ny):
    s = samples.clone().detach()
    N = s.size()[1]
    s[:,i,j][samples[:,i,j] == 0]   = 1
    s[:,i,j][samples[:,i,j] == 1]   = 0
    s[:,(i+direction[0])%Nx,(j+direction[1])%Ny][samples[:,(i+direction[0])%Nx,(j+direction[1])%Ny] == 0] = 1
    s[:,(i+direction[0])%Nx,(j+direction[1])%Ny][samples[:,(i+direction[0])%Nx,(j+direction[1])%Ny] == 1] = 0
    return s

def flip_spin(samples, i,j):
    s = samples.clone().detach()
    s[:,i,j][samples[:,i,j] == 0] = 1
    s[:,i,j][samples[:,i,j] == 1] = 0
    return s

In [93]:
n_samples = 200
Elocs = []
for epoch in range(1, n_epochs + 1):
    samples, log_probs, phases = model.sample(n_samples)
    optimizer.zero_grad() # Clears existing gradients from previous epoch
    
    Eloc, cost = cost_fct(samples, model, Jp, Jz, log_probs, phases, bounds, sz_tot=None)
    cost.backward(retain_graph=True) # Does backpropagation and calculates gradients
    optimizer.step() # Updates the weights accordingly
    optimizer.zero_grad()
    sx = get_sx(samples, log_probs, phases)
    sy = get_sy(samples, log_probs, phases)
    sz = get_sz(samples)
    Elocs = (Eloc).mean(axis=0)
    if epoch%10 == 0 or epoch == 1:
        print('Epoch: {}/{}.............'.format(epoch, n_epochs), end=' ')
        print("Loss: {:.8f}".format(cost)+", mean(E): {:.8f}".format((Eloc).mean(axis=0))+", var(E): {:.8f}".format((Eloc).var(axis=0))+", Sx: {:.4f}".format(sx)+", Sy: {:.4f}".format(sy)+", Sz: {:.4f}".format(sz))

Epoch: 1/3000............. Loss: -0.02638908, mean(E): 4.42814159-0.02440443j, var(E): 0.48367098, Sx: 0.2016, Sy: -0.0412, Sz: 0.0706
Epoch: 10/3000............. Loss: -2.48190373, mean(E): -1.60675430+0.01123549j, var(E): 4.36623240, Sx: -0.0215, Sy: 0.0085, Sz: -0.0216
Epoch: 20/3000............. Loss: -2.15646090, mean(E): -4.88775921-0.02380471j, var(E): 2.98553753, Sx: 0.0054, Sy: 0.0126, Sz: 0.0069
Epoch: 30/3000............. Loss: -0.98382990, mean(E): -5.43676949+0.01438079j, var(E): 0.56575602, Sx: 0.0118, Sy: 0.0281, Sz: 0.0000
Epoch: 40/3000............. Loss: -0.28635922, mean(E): -5.80788994+0.00139284j, var(E): 0.35168815, Sx: 0.0004, Sy: -0.0001, Sz: -0.0059
Epoch: 50/3000............. Loss: 0.20062722, mean(E): -5.83531189-0.00301035j, var(E): 0.28871056, Sx: -0.0024, Sy: -0.0012, Sz: -0.0044
Epoch: 60/3000............. Loss: 0.01855647, mean(E): -5.86335611-0.00344591j, var(E): 0.28627756, Sx: -0.0009, Sy: -0.0034, Sz: 0.0028
Epoch: 70/3000............. Loss: 0.193356

KeyboardInterrupt: 

In [94]:
samples, log_probs, phases = model.sample(10000)
print(torch.reshape(samples, (10000,Nx, Ny))[:10])
#print(log_probs[:50])
#print(torch.exp(0.5*log_probs)[:50]*torch.exp(1j*phases)[:50])
print("max")
print(samples[np.argmax(torch.exp(0.5*log_probs).detach().numpy())])
print(max(torch.exp(0.5*log_probs).detach().numpy()))
print("min")
print(samples[np.argmin(torch.exp(0.5*log_probs).detach().numpy())])
print(min(torch.exp(0.5*log_probs).detach().numpy()))


tensor([[[1, 0, 1, 0],
         [0, 1, 0, 1],
         [1, 0, 1, 0],
         [0, 1, 0, 1]],

        [[0, 1, 0, 1],
         [1, 0, 1, 0],
         [0, 1, 0, 1],
         [1, 0, 1, 0]],

        [[0, 1, 0, 1],
         [1, 0, 1, 0],
         [0, 1, 0, 1],
         [1, 0, 1, 0]],

        [[0, 1, 0, 1],
         [1, 0, 1, 0],
         [0, 1, 0, 1],
         [1, 0, 1, 0]],

        [[0, 1, 0, 1],
         [1, 0, 1, 0],
         [0, 1, 0, 1],
         [1, 0, 1, 0]],

        [[0, 0, 1, 0],
         [1, 1, 0, 1],
         [1, 0, 1, 0],
         [0, 1, 0, 1]],

        [[0, 1, 0, 1],
         [1, 0, 1, 0],
         [0, 1, 0, 1],
         [1, 0, 1, 0]],

        [[1, 0, 1, 0],
         [0, 1, 0, 1],
         [1, 0, 1, 0],
         [0, 1, 0, 1]],

        [[0, 1, 0, 1],
         [1, 0, 1, 0],
         [0, 1, 0, 1],
         [1, 0, 1, 0]],

        [[1, 0, 1, 0],
         [0, 1, 0, 1],
         [1, 0, 1, 0],
         [0, 1, 0, 1]]])
max
tensor([[1, 0, 1, 0],
        [0, 1, 0, 1],
        [1, 

### 3. Evaluate Observables

This is what we can test:
- For the Heisenberg model:The average magnetization in all directions should vanish.
- The correlator in all directions should be:

In [95]:
# calculate the nearest neighbor spin correlators
samples, log_probs, phases = model.sample(1000)
szsz = get_szsz(samples, log_probs, bounds)
print(szsz)
sxsx = get_sxsx(samples, log_probs, phases, bounds)
print(sxsx)
sysy = get_sysy(samples, log_probs, phases, bounds)
print(sysy)

tensor([[-0.4350, -0.5000, -0.5000],
        [-0.3960, -0.5000, -0.5000],
        [-0.4480, -0.5000, -0.5000]])
tensor([[-1.3036e-01,  2.3703e-19,  5.0937e-35],
        [-1.0723e-01,  4.3039e-06,  3.6025e-14],
        [-3.0751e-04, -1.3146e-08,  0.0000e+00]], grad_fn=<DivBackward0>)
tensor([[-1.9814e-04, -2.3703e-19,  0.0000e+00],
        [ 9.7889e-04,  1.3668e-25, -1.4199e-14],
        [ 7.4662e-05, -1.5608e-07,  0.0000e+00]], grad_fn=<DivBackward0>)


In [89]:
# calculate sx, sy and sz
sz = get_sz(samples)
print(sz)
sx = get_sx(samples, log_probs, phases)
print(sx)
sy = get_sy(samples, log_probs, phases)
print(sy)

tensor(-0.0008, dtype=torch.float64)
tensor(-0.0034, grad_fn=<DivBackward0>)
tensor(0.0033, grad_fn=<DivBackward0>)


In [90]:
def save(model, boundaries, folder):
    torch.save(model.state_dict(), folder+"model_params.pt")
    # calculate the nearest neighbor spin correlators
    samples, log_probs, phases = model.sample(1000)
    szsz = get_szsz(samples, log_probs, boundaries).detach().numpy()
    np.save(folder+"szsz.npy", szsz)
    sxsx = get_sxsx(samples, log_probs, phases, boundaries).detach().numpy()
    np.save(folder+"sxsx.npy", sxsx)
    sysy = get_sysy(samples, log_probs, phases, boundaries).detach().numpy()
    np.save(folder+"sysy.npy", sysy)

save(model, bounds, "with_total_sz=0/Delta=0/")

FileNotFoundError: [Errno 2] No such file or directory: 'with_total_sz=0/Delta=0/model_params.pt'

In [None]:
# the model can then be load again by using
#model = Model(input_size=2, system_size=systemsize, hidden_dim=hiddendim, n_layers=1)
#model.load_state_dict(torch.load("model_params.pt"))