In [1]:
import numpy as np
import torch
import torch.nn as nn
import torch.nn.functional as F

from soft import discrete_sample

Need to write differentiable approximations for the following operations: logical_and, indicate, max

# Indicator

In [21]:
def compare(a, b):
    '''returns 1 if a > b and 2 if b > a'''
    def compare_soft(epsilon_t, nu_t, hardness=0.85):
        return torch.sigmoid(hardness * (epsilon_t - nu_t))

    def compare_hard(a, b):
        return (a > b).float()
    
    soft = compare_soft(a, b)
    return compare_hard(a, b) + soft - soft.detach()

def compare_hard(epsilon_t, nu_t):
    return (epsilon_t > nu_t).float()

a = torch.tensor(3.0, requires_grad=True)
b = torch.tensor(5.0, requires_grad=True)

x = compare(a, b)
x.backward()
print(a.grad, b.grad)

tensor(0.1110) tensor(-0.1110)


## Maximum

In [37]:
def soft_maximum(a, b):
    return a*compare(a, b) + b*compare(b, a)

value1 = torch.tensor([10.0], requires_grad=True)
value2 = torch.tensor([5.0], requires_grad=True)

hard_max = torch.max(value1, value2)
soft_max = soft_maximum(value1, value2)

print(hard_max, soft_max)
soft_max.backward()
print(value1.grad, value2.grad)

tensor([10.], grad_fn=<MaximumBackward0>) tensor([10.], grad_fn=<AddBackward0>)
tensor([1.0589]) tensor([-0.0589])


## logical_and, logical_or, logical_not

In [100]:
def logical_not(a, grad=True):
    def hard_not(a):
        assert a.dtype == torch.long
        return torch.logical_not(a)
    
    def soft_not(a):
        return 1 - a
    
    if not grad:
        return hard_not(a.long())

    soft = soft_not(a)
    return hard_not(a.long()) + soft - soft.detach()


def logical_or(a, b, grad=True):
    def hard_or(a, b):
        assert a.dtype == torch.long and b.dtype == torch.long
        return torch.logical_or(a, b)
    
    def soft_or(a, b):
        return a + b
    
    if not grad:
        return hard_or(a.long(), b.long())

    soft = soft_or(a, b)
    return hard_or(a.long(), b.long()) + (soft - soft.detach())

def logical_and(a, b, grad=True):
    def hard_and(a, b):
        assert a.dtype == torch.long and b.dtype == torch.long
        return torch.logical_and(a, b)
    
    def soft_and(a, b):
        return a * b
    
    if not grad:
        return hard_and(a.long(), b.long())

    soft = soft_and(a, b)
    return hard_and(a.long(), b.long()) + (soft - soft.detach())

## logical_or

In [101]:
x_a = torch.tensor([1.0, 0.0, 0.0, 1.0, 0.0], requires_grad=True)
x_b = torch.tensor([1.0, 0.0, 1.0, 0.0, 0.0], requires_grad=True)

z = x_a + x_b
z = logical_or(x_a, x_b)
z.sum().backward()

z2 = torch.logical_or(x_a, x_b).long()

print("Output: ", z, z2)
print("Gradient: ", x_a.grad, x_b.grad)

Output:  tensor([1., 0., 1., 1., 0.], grad_fn=<AddBackward0>) tensor([1, 0, 1, 1, 0])
Gradient:  tensor([1., 1., 1., 1., 1.]) tensor([1., 1., 1., 1., 1.])


## logical_and

In [102]:
x_a = torch.tensor([1.0, 0.0, 0.0, 1.0, 0.0], requires_grad=True)
x_b = torch.tensor([1.0, 0.0, 1.0, 0.0, 0.0], requires_grad=True)

z_basic = x_a * x_b
z_at = logical_and(x_a, x_b)
z_torch = torch.logical_and(x_a, x_b).long()
print("Output: ", z_at, z_torch)

z_at.sum().backward()
print("Gradient: ", x_a.grad, x_b.grad)

Output:  tensor([1., 0., 0., 0., 0.], grad_fn=<AddBackward0>) tensor([1, 0, 0, 0, 0])
Gradient:  tensor([1., 0., 1., 0., 0.]) tensor([1., 0., 0., 1., 0.])


## logical_not

In [105]:
x_a = torch.tensor([1.0, 0.0, 0.0, 1.0, 0.0], requires_grad=True)

z_at = logical_not(x_a)
z_torch = torch.logical_not(x_a).long()

print("Output: ", z_at, z_torch)
z_at.sum().backward()
print("Gradient: ", x_a.grad)

Output:  tensor([0., 1., 1., 0., 1.], grad_fn=<SubBackward0>) tensor([0, 1, 1, 0, 1])
Gradient:  tensor([-1., -1., -1., -1., -1.])


## discrete_sampling

In [12]:
initial_infections_percentage = torch.tensor(0.6, requires_grad=True)

agents_stages = discrete_sample(initial_infections_percentage, size=(10, 1))
print(agents_stages)
agents_stages.sum().backward()
print(initial_infections_percentage.grad)

tensor([1., 1., 0., 1., 1., 1., 0., 0., 1., 0.], grad_fn=<SelectBackward0>)
tensor(4.7663)


In [2]:
# composite operations
initial_infections_percentage = torch.tensor(0.6, requires_grad=True)
agents_stages = discrete_sample(initial_infections_percentage, size=(10, 1))

mask = torch.tensor([1.0, 0.0, 0.0, 0.0, 0.0, 1.0, 0.0, 1.0, 0.0, 1.0])

out = logical_and(agents_stages, mask)
print(out)
out.sum().backward()
print(initial_infections_percentage.grad)

tensor([1., 0., 0., 0., 0., 1., 0., 1., 0., 0.], grad_fn=<AddBackward0>)
tensor(6.8792)


## discrete_event representation with sparse tensors

In [56]:
# StimulusPayments: irregular events implemented at discrete intervals

num_agents = 20
num_days = (100, 1) # total number of days
indices = [[23, 47], [0, 0]] # which day
values = torch.tensor([600.0, 300.0]) # stimulus amount

current_assets = torch.zeros((num_agents, 1))

stimulus_payments = torch.sparse_coo_tensor(indices=indices, values=values, size=num_days)

eligible_prob = torch.tensor([0.3], requires_grad=True)
agent_eligibility = discrete_sample(sample_prob=eligible_prob, size=(num_agents, 1)).unsqueeze(dim=1)

print("Eligibility Mask: ", agent_eligibility.shape, stimulus_payments.shape)

for t in range(40):
    #current_assets = torch.sparse.addmm(current_assets, agent_eligibility, stimulus_payments[t])
    current_assets = current_assets + agent_eligibility*stimulus_payments[t].to_dense()

ans = current_assets.sum()
ans.backward()

print(eligible_prob.grad)

Eligibility Mask:  torch.Size([20, 1]) torch.Size([100, 1])
tensor([20243.9863])


## Tasks to do:
1. Add sparse and dense tensors
2. Multiple sparse and dense tensors
3. Check backpropogation with sparse_coo_tensor vs sparse_csr_tensor

## Add sparse and dense tensor

In [31]:
tensor_size = (100, 1)
sparse_indices = [[23, 47], [0, 0]]
sparse_values = torch.tensor([600., 300.])
eligible_prob = torch.tensor([0.3], requires_grad=True)

sparse_tensor = torch.sparse_coo_tensor(indices=sparse_indices, values=sparse_values, size=tensor_size)
dense_tensor = torch.zeros(sparse_tensor.shape)
eligibility_mask = discrete_sample(sample_prob=eligible_prob, size=tensor_size).unsqueeze(dim=1)

In [21]:
add_tensors = dense_tensor + sparse_tensor
print(add_tensors.to_sparse(sparse_dim=2))

tensor(indices=tensor([[23, 47],
                       [ 0,  0]]),
       values=tensor([600., 300.]),
       size=(100, 1), nnz=2, layout=torch.sparse_coo)


## Multiply sparse and dense tensors

In [27]:
multiply_tensors = torch.sparse.mm(dense_tensor, sparse_tensor.t())

print(multiply_tensors.shape)

torch.Size([100, 100])


## Add and Multiply - sparse and dense tensors

In [33]:
print(eligibility_mask.shape, dense_tensor.shape, sparse_tensor.shape)

new_tensor = torch.sparse.addmm(dense_tensor, sparse_tensor, eligibility_mask.t())
print(new_tensor.shape)

torch.Size([100, 1]) torch.Size([100, 1]) torch.Size([100, 1])
torch.Size([100, 100])


In [39]:
# sparse-dense elementwise multiplication
val2 = sparse_tensor * eligibility_mask
print(val2)

tensor(indices=tensor([[23, 47],
                       [ 0,  0]]),
       values=tensor([600.,   0.]),
       size=(100, 1), nnz=2, layout=torch.sparse_coo, grad_fn=<MulBackward0>)


In [45]:
# sparse-dense elementwise addition
val3 = dense_tensor + sparse_tensor * eligibility_mask
val3.to_sparse()

tensor(indices=tensor([[23],
                       [ 0]]),
       values=tensor([600.]),
       size=(100, 1), nnz=1, layout=torch.sparse_coo,
       grad_fn=<ToSparseBackward0>)