<a href="https://colab.research.google.com/github/HemaGarima/Machine-Learning/blob/master/Basic_Attention.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

## Basic attention operation

In [1]:
import numpy as np

In [2]:
def softmax(x , axis = 0):
  """ Calculate softmax function for an array x along specified axis

    axis = 0 calculates softmax across rows which means each column sums to 1
    axis = 1 calculates softmax across columns which means each row sums to 1
  """
  return np.exp(x)/np.expand_dims(np.sum(np.exp(x),axis = axis ),axis)

## 1. Calculating alignment scores

In [3]:
hidden_size = 16
attention_size = 10
input_length = 5

np.random.seed(42)

# Synthetic vectors used to test
encoder_states = np.random.randn(input_length , hidden_size)
decoder_state = np.random.randn(1 , hidden_size)

# Weights for the neural network , theses are typically learned through training
# Use these in the alignment functino below as the layer weights
layer_1 = np.random.randn(2*hidden_size , attention_size)
layer_2 = np.random.randn(attention_size , 1)

# Implement this function. Replace None with your code. Solution at the bottom of the notebook
def alignment(encoder_states , decoder_state):
  # First , concatenate the encoder states and the decoder state
  inputs = np.concatenate([encoder_states , np.repeat(decoder_state , input_length, axis=0)], axis = 1)
  assert inputs.shape == (input_length , 2*hidden_size)

  # Matrix multiplication of the concatenated inputs and layer_1 , with tanh activation
  activations = np.tanh(np.matmul(inputs , layer_1))
  assert activations.shape == (input_length , attention_size)

  # Matrix multiplication of the activations with layer_2. Remember that you don't need tanh here
  scores = np.matmul(activations , layer_2)
  assert scores.shape == (input_length , 1)

  return scores

In [4]:
# Run this to test your alignment function
scores = alignment(encoder_states , decoder_state)
print(scores)

[[4.35790943]
 [5.92373433]
 [4.18673175]
 [2.11437202]
 [0.95767155]]


## 2. Turning alignment into weights

In [5]:
# Implement softmax function

## 3. Weight the encoder output vectors and sum

In [6]:
def attention(encoder_states , decoder_state):
  """ Example function that calculates attention, returns the context vector

        Arguments:
        encoder_vectors: NxM numpy array, where N is the number of vectors and M is the vector length
        decoder_vector: 1xM numpy array, M is the vector length, much be the same M as encoder_vectors
  """

  # First , calculate the alignment scores
  scores = alignment(encoder_states , decoder_state)

  # Then take the softmax of the alignment scores to get a weight distribution
  weights = softmax(scores)

  # Multiply each encoder state by its respective weight
  weighted_scores = [state*weight for state , weight in zip(encoder_states , weights)]

  # Sum up weighted alignement vectors to get the context to get the context vector and return it
  context = sum(weighted_scores)
  return context

context_vector = attention(encoder_states , decoder_state)
print(context_vector)

[-0.63514569  0.04917298 -0.43930867 -0.9268003   1.01903919 -0.43181409
  0.13365099 -0.84746874 -0.37572203  0.18279832 -0.90452701  0.17872958
 -0.58015282 -0.58294027 -0.75457577  1.32985756]
