practice for https://ita9naiwa.github.io/numeric%20calculation/2018/11/10/Einsum.html

## Transpose

In [1]:
import numpy as np

A = np.array([[1,2,3], [4,5,6]])
R = np.einsum("ij->ji", A)
print(R)

[[1 4]
 [2 5]
 [3 6]]


## Diagonal, Trace

In [2]:
import numpy as np

A = np.eye(10)
diag = np.einsum('ii->i', A)
trace =np.einsum('ii->', A)
print(diag)
print(trace)

[1. 1. 1. 1. 1. 1. 1. 1. 1. 1.]
10.0


## Matrix sum to scalar

In [3]:
import numpy as np

A = np.array([[1,2,3], [4,5,6]])
R = np.einsum("ij->", A)
print(R)

21


## matrix column or row sum (to vector)

In [5]:
import numpy as np

A = np.array([[1,2,3], [4,5,6]])
print(A)
row_sum = np.einsum("ij->i", A)
col_sum =np.einsum("ij->j", A)
print(row_sum)
print(col_sum)

[[1 2 3]
 [4 5 6]]
[ 6 15]
[5 7 9]


## Dot Product, Outer product of two vectors

In [8]:
import numpy as np

x = np.array([-1, -10, -100])
y = np.array([1, 10, 100])
dot = np.einsum('i,i->', x, y )
outer = np.einsum('i,j->ij', x,y)
print(dot)
print(outer)

-10101
[[    -1    -10   -100]
 [   -10   -100  -1000]
 [  -100  -1000 -10000]]


## Hadamard(element-wise) product of vector or matrix

In [10]:
import numpy as np

x = np.array([-1, -10, -100])
y = np.array([1, 10, 100])
elemwise_vec = np.einsum('i,i->i', x, y)
print(elemwise_vec)
A = np.arange(6).reshape((2, 3))
B = np.arange(6).reshape((2, 3))
elemwise_mat = np.einsum('ij,ij->ij', A, B)
print(elemwise_mat)

[    -1   -100 -10000]
[[ 0  1  4]
 [ 9 16 25]]


## Matrix-Vector multiplication

In [11]:
import numpy as np

A = np.array([[1,2,3], [4,5,6]])
x = np.array([-1, -10, -100])
b = np.einsum('ij,j->i', A, x)
print(b)

[-321 -654]


## Matrix-Matrix Multiplication and Batched Matrix multiplication

In [14]:
import numpy as np

## Matrix-Matrix Multiplication
A = np.array([[1,2,3], [4,5,6]])
B = A.transpose()
R = np.einsum('ik,kj->ij', A, B)
print(R)

[[14 32]
 [32 77]]


In [17]:
## Batched Matrix Multiplication
A = np.random.random(size=(3,10,4))
B = np.random.random(size=(3,4, 8))
R = np.einsum('bik,bkj->bij',A, B)
print(R.shape)

(3, 10, 8)


## Quadritc Form, or Matrix norm, or Distance with respect to Matrix(Mahalanobis distance)

In [19]:
import numpy as np

x = np.array([1,2,3])
y = np.array([-1,-2,-3])
A = np.random.random(size=(3, 3))

r = np.einsum('i,ij,j->', x, A, y)
print(r)

-18.328254622191892


## Examples 1: Linear Regression with Einsum

In [20]:
import math
import numpy as np

import torch
import torch.nn as nn
import torch.optim as optim
import torch.autograd as autograd
import torch.nn.functional as F
from torch.autograd import Variable
from torch.utils.data import Dataset, DataLoader

#### Examples 1: Linear Regression with Einsum


In [21]:
class LinearModule(nn.Module):
    def __init__(self, idim, odim):
        super(LinearModule, self).__init__()
        self.W = nn.Parameter(torch.Tensor(idim, odim))
        self.W.data.uniform_(-(1./ math.sqrt(idim+odim)), 1. / math.sqrt(idim+odim))
        self.b = nn.Parameter(torch.Tensor(np.random.normal(scale=0.001, size=(odim, ))))

    def forward(self, x):
        """
        Args:
            x: [batch_size x dim]
        """
        p = torch.einsum("bt,to->bo", [x, self.W])
        p += self.b
        q = p.squeeze(1)
        return p, q

    def loss(self, x, y):
        _, pred_y = self.forward(x)
        diff = (pred_y - y) / y.size(0)
        return torch.einsum("x,x->", [diff, diff])

#### Examples 2: Pointer Attention mechanism With Einsum

In [22]:
class ptr_att(nn.Module):
    def __init__(self, hidden_size, name='PointerAttention', use_cuda=False):
        super(ptr_att, self).__init__()
        self.W_enc = nn.Parameter(torch.FloatTensor(hidden_size, hidden_size))
        self.W_ref = nn.Parameter(torch.FloatTensor(hidden_size, hidden_size))
        self.V = nn.Parameter(torch.FloatTensor(hidden_size))

        self.V.data.uniform_(-(1. / math.sqrt(hidden_size)) , 1. / math.sqrt(hidden_size))
        self.W_enc.data.uniform_(-(1. / math.sqrt(hidden_size)) , 1. / math.sqrt(hidden_size))
        self.W_ref.data.uniform_(-(1. / math.sqrt(hidden_size)) , 1. / math.sqrt(hidden_size))

    def forward(self, enc, ref):
        batch_size = enc.size(0)
        seq_len = enc.size(1)
        """
        Args:
            enc: [batch_size x seq_len x hidden_size] (actually, seq_len is different by each)
            ref: [batch_size x hidden_size]
        """
        Wenc = torch.einsum("ak,bjk->bja", [self.W_enc, enc])
        Wref = torch.einsum("ak,bk->ba", [self.W_ref, ref]).unsqueeze(1).repeat(1,seq_len,1)
        # [batch_size x seq_len x hidden_size] reference vector multiplied by w_enc
        W = torch.einsum("k,ijk->ij", [self.V, F.tanh(Wenc + Wref)])
        # [batch_size x seq_len],
        #return W
        return W

#### Example 3: Attention Mechanism brought from here
https://rockt.github.io/2018/04/30/einsum

In [23]:
# Parameters
# -- [hidden_dimension]
bM, br, w = random_tensors([7], num=3, requires_grad=True)
# -- [hidden_dimension x hidden_dimension]
WY, Wh, Wr, Wt = random_tensors([7, 7], num=4, requires_grad=True)

# Single application of attention mechanism
def attention(Y, ht, rt1):
  # -- [batch_size x hidden_dimension]
  tmp = torch.einsum("ik,kl->il", [ht, Wh]) + torch.einsum("ik,kl->il", [rt1, Wr])
  Mt = F.tanh(torch.einsum("ijk,kl->ijl", [Y, WY]) + tmp.unsqueeze(1).expand_as(Y) + bM)
  # -- [batch_size x sequence_length]
  at = F.softmax(torch.einsum("ijk,k->ij", [Mt, w]))
  # -- [batch_size x hidden_dimension]
  rt = torch.einsum("ijk,ij->ik", [Y, at]) + F.tanh(torch.einsum("ij,jk->ik", [rt1, Wt]) + br)
  # -- [batch_size x hidden_dimension], [batch_size x sequence_dimension]
  return rt, at

# Sampled dummy inputs
# -- [batch_size x sequence_length x hidden_dimension]
Y = random_tensors([3, 5, 7])
# -- [batch_size x hidden_dimension]
ht, rt1 = random_tensors([3, 7], num=2)

rt, at = attention(Y, ht, rt1)
at  # -- print attention weights


NameError: name 'random_tensors' is not defined