import modules

In [11]:
from functools import partial
import torch as t
from functorch import vjp

In [2]:
def test(a,b,c):
    return a * b + c

In [7]:
a = t.tensor(2.)
b = t.tensor(4.)

In [8]:
test_fn = partial(test, a, b)

In [10]:
test_fn(1)

tensor(9.)

In [12]:
vjp(test_fn, t.tensor(1.))

(tensor(9.),
 <function functorch._src.eager_transforms._vjp_with_argnums.<locals>.wrapper(cotangents, retain_graph=True, create_graph=None)>)

# Forward AD testing

In [18]:
import torch as t
import torch.autograd.forward_ad as fwAD

In [41]:
def test_fn(w, b, x):
    print(f"w: {w}, x: {x}, b: {b}")
    return w*x+b

In [42]:
w = t.tensor(2.)
primal = w.clone().requires_grad_()
bias = t.tensor(1.)
tangent = t.tensor(1.)

In [44]:
with fwAD.dual_level():
    dual_input = fwAD.make_dual(primal, tangent)
    # Tensors that do not not have an associated tangent are automatically
    # considered to have a zero-filled tangent of the same shape.
    dual_output = test_fn(dual_input, bias, t.tensor(10.))
    fn1_results = fwAD.unpack_dual(dual_output)
    print(fn1_results)
    dual_output2 = test_fn(dual_input, bias, dual_output)
    fn2_results = fwAD.unpack_dual(dual_output2)
    print(fn2_results)

w: 2.0, x: 10.0, b: 1.0
UnpackedDualTensor(primal=tensor(21., grad_fn=<Identity>), tangent=tensor(10., grad_fn=<AddBackward0>))
w: 2.0, x: 21.0, b: 1.0
UnpackedDualTensor(primal=tensor(43., grad_fn=<Identity>), tangent=tensor(41., grad_fn=<AddBackward0>))


In [40]:
w.requires_grad_()
a = test_fn(w, bias, t.tensor(10.))
b = test_fn(w, bias, a)
print(f'func output: {b}')
t.autograd.grad(b, w)

tensor(2., requires_grad=True) tensor(10.) tensor(1.)
tensor(2., requires_grad=True) tensor(21., grad_fn=<AddBackward0>) tensor(1.)
func output: 43.0


(tensor(41.),)

# experimenting with Reaction function

import modules

In [82]:
from torch.distributions import uniform, normal
from reaction import rho
from tqdm import tqdm
import numpy as np

In [54]:
def chop_and_shuffle_data(sequence, shuffle=True):
    """
    Chop the training data into a set of state transitions and shuffle the resulting set.

    sequences (np.ndarray): matrix of shape (n_sequences, steps_per_seq, grid_height, grid_width)
    """
    steps_per_seq, _, grid_height, grid_width = sequence.shape
    # each transition consists of 2 states
    indexer = np.arange(2)[None, :] + np.arange(steps_per_seq - 1)[:, None]
    chopped_set = np.zeros(
        [(steps_per_seq - 1), 2, 2, grid_height, grid_width]
    )
    chopped_set = sequence.detach().numpy()[indexer]
    if shuffle:
        np.random.shuffle(chopped_set)
    return t.tensor(chopped_set)

define probability functions

In [49]:
def p1(cells: t.Tensor, N: int, gamma: float, k1: float) -> t.Tensor:
    n = cells[0]
    k1_bar = k1 / ((N - 1) * (N - 2))
    return gamma * k1_bar * n * (n - 1) * (N - n)


def p2(cells: t.Tensor, N: int, gamma: float, k1_star: float) -> t.Tensor:
    n = cells[0]
    k1_star_bar = k1_star / ((N - 1) * (N - 2))
    return gamma * k1_star_bar * n * (N - n) * (N - 1 - n)


def p3(cells: t.Tensor, N: int, gamma: float, k2: float) -> t.Tensor:
    n = cells[0]
    m = cells[1]
    k2_bar = k2 / N
    return gamma * k2_bar * (N - n) * m


def p4(cells: t.Tensor, N: int, gamma: float, k2_star: float) -> t.Tensor:
    n = cells[0]
    m = cells[1]
    k2_star_bar = k2_star / N
    return gamma * k2_star_bar * n * (N - m)


def p5(cells: t.Tensor, N: int, gamma: float, k3: float) -> t.Tensor:
    n = cells[0]
    m = cells[1]
    k3_bar = k3 / N
    return gamma * k3_bar * (N - n) * (N - m)


def p6(cells: t.Tensor, N: int, gamma: float, k3_star: float) -> t.Tensor:
    n = cells[0]
    m = cells[1]
    k3_star_bar = k3_star / N
    return gamma * k3_star_bar * n * m

In [56]:
grid = t.zeros((2,15,15))
grid[:] = 50
grid[0,12:17] = 90
N = 100 
gamma = 0.005 
rate_coefficients = t.tensor([0.98,0.98,0.1,0.1,0.2,0.2])
probability_funcs = [p1,p2,p3,p4,p5,p6]
num_steps = 1_000
t.sum(grid[0])

tensor(13050.)

In [51]:
grid = grid.float()
sequence = t.zeros((num_steps, *grid.shape))

for i in tqdm(range(num_steps)):
    sequence[i] = grid.detach().clone()
    grid = rho(grid, N, gamma, rate_coefficients, probability_funcs, num_reaction_channels=6)

100%|██████████| 1000/1000 [00:02<00:00, 357.71it/s]


In [55]:
dataset = chop_and_shuffle_data(sequence=sequence)

experiment

In [71]:
X = dataset[:,0]
Y_obs = dataset[:,1]

In [68]:
X.permute(1,0,2,3).shape

torch.Size([2, 999, 15, 15])

In [74]:
print(normal.Normal(0.5,0.1).sample((10,))[0])

tensor(0.4363)


In [99]:
class STEFunction(t.autograd.Function):
    @staticmethod
    def forward(ctx, input):
        return (input > 0).float()

    @staticmethod
    def backward(ctx, grad_output):
        return grad_output

    @staticmethod
    def jvp(ctx, grad_output):
        return grad_output

In [89]:
def MSE(X, Y):
    return t.mean(t.sum((X-Y)**2, dim=((1,2,3))))

In [90]:
def dist(X, D1, D2):
    mse_D1 = MSE(X, D1)
    #print(mse_D1)
    mse_D2 = MSE(X, D2)
    #print(mse_D2)
    return (mse_D1 - mse_D2)**2

In [126]:
rate_coefficients = t.tensor([0.01,0.98,0.1,0.1,0.2,0.2])
primal = rate_coefficients.clone().requires_grad_()
tangents = t.eye(6)
with fwAD.dual_level():
    batch_size, grids_per_el, height, width = X.shape
    channel_matrix = t.randint(high=6, size=(batch_size, height, width))
    for tangent in tangents:
        dual_input = fwAD.make_dual(primal, tangent)

        # move the batch dimension in to match the masking
        Y_sim = X.clone()
        Y_sim = Y_sim.permute(1, 0, 2, 3)

        print()
        


tensor(0.0100, grad_fn=<SelectBackward0>, tangent=1.0)
tensor(0.0100, grad_fn=<SelectBackward0>, tangent=0.0)
tensor(0.0100, grad_fn=<SelectBackward0>, tangent=0.0)
tensor(0.0100, grad_fn=<SelectBackward0>, tangent=0.0)
tensor(0.0100, grad_fn=<SelectBackward0>, tangent=0.0)
tensor(0.0100, grad_fn=<SelectBackward0>, tangent=0.0)


In [127]:
#rate_coefficients = normal.Normal(0.5,0.1).sample((6,))
rate_coefficients = t.tensor([0.01,0.98,0.1,0.1,0.2,0.2])

primal0 = rate_coefficients[0].clone().requires_grad_()
tangent0 = t.tensor(1.)
primal1 = rate_coefficients[1].clone().requires_grad_()
tangent1 = t.tensor(1.)
primal2 = rate_coefficients[2].clone().requires_grad_()
tangent2 = t.tensor(1.)
primal3 = rate_coefficients[3].clone().requires_grad_()
tangent3 = t.tensor(1.)
primal4 = rate_coefficients[4].clone().requires_grad_()
tangent4 = t.tensor(1.)
primal5 = rate_coefficients[5].clone().requires_grad_()
tangent5 = t.tensor(1.)

params = [
    (primal0,tangent0),
    (primal1,tangent1),
    (primal2,tangent2),
    (primal3,tangent3),
    (primal4,tangent4),
    (primal5,tangent5)
]

Y_sim = X.clone()

fwd_jacobian = []

for idx, param in params:
    primal, tangent = param
    with fwAD.dual_level():

        batch_size, grids_per_el, height, width = X.shape
        channel_matrix = t.randint(high=6, size=(batch_size, height, width))

        dual_input = fwAD.make_dual(primal, tangent)

        # move the batch dimension in to match the masking
        Y_sim = Y_sim.permute(1, 0, 2, 3)

        for channel_idx in range(6):
            channel_mask = channel_matrix == channel_idx
            if channel_idx == 0:
                if idx == 0:
                    reaction_prob = p1(Y_sim[:, channel_mask], N, gamma, dual_input)
                else:
                    reaction_prob = p1(Y_sim[:, channel_mask], N, gamma, rate_coefficients[0])
            if channel_idx == 1:
                if idx == 1:
                    reaction_prob = p1(Y_sim[:, channel_mask], N, gamma, dual_input)
                else:
                    reaction_prob = p1(Y_sim[:, channel_mask], N, gamma, rate_coefficients[1])
            if channel_idx == 2:
                if idx == 2:
                    reaction_prob = p1(Y_sim[:, channel_mask], N, gamma, dual_input)
                else:
                    reaction_prob = p1(Y_sim[:, channel_mask], N, gamma, rate_coefficients[2])
            if channel_idx == 3:
                if idx == 0:
                    reaction_prob = p1(Y_sim[:, channel_mask], N, gamma, dual_input)
                else:
                    reaction_prob = p1(Y_sim[:, channel_mask], N, gamma, rate_coefficients[3])
            if channel_idx == 4:  
                if idx == 0:
                    reaction_prob = p1(Y_sim[:, channel_mask], N, gamma, dual_input)
                else:
                    reaction_prob = p1(Y_sim[:, channel_mask], N, gamma, rate_coefficients[4])
            if channel_idx == 5:
                if idx == 0:
                    reaction_prob = p1(Y_sim[:, channel_mask], N, gamma, dual_input)
                else:
                    reaction_prob = p1(Y_sim[:, channel_mask], N, gamma, rate_coefficients[5])

            # randomly sample a threshold value for each cell to compare the prob. against
            num_cells = Y_sim[:, channel_mask].shape[-1]
            #print(num_cells)
            thresholds = uniform.Uniform(0, 1).sample((num_cells,))

            dual_output2 = STEFunction.apply(dual_output - thresholds)
            #print(dual_output2)

            dual_output3 = Y_sim[:, channel_mask] + dual_output2
            #print(dual_output3)

            Y_sim[:, channel_mask] = dual_output3
            #print(Y_sim[:, channel_mask][0,0].shape)

        Y_sim = Y_sim.permute(1, 0, 2, 3)
        dual_output_4 = MSE(X, Y_sim)
        #print(dual_output_4)

        dual_output5 = dist(X, Y_obs, Y_sim)
        print(dual_output5)


    

        

    

TypeError: iteration over a 0-d tensor