# 11.3. Attention Scoring Functions

In [1]:
#Autor: Isaac Reyes
#Librerias:
#use Math::Trig;
#use AI::MXNet 'mx';
#use AI::MXNet::Gluon::NN 'nn';
#mx->npx->set_np();
use strict; 
use warnings; 
use Data::Dump qw(dump); 
use d2l; 
IPerl->load_plugin('Chart::Plotly');  #cargamos para graficar

## 11.3.1. Dot Product Attention

### 11.3.1. Dot Product Attention

#### 11.3.2.1. Masked Softmax Operation

In [2]:
sub masked_softmax {
    #en el ultimo eje
    my ($X, $valid_lens) = @_; #@save
    #X: tensores dimensiones
    if (!defined $valid_lens){
        return mx->nd->softmax($X);
    } else {
        my $shape = $X->shape;
        if ($valid_lens->ndim == 1){
        $valid_lens = $valid_lens->repeat($shape->[1]);
        } else {
        $valid_lens = $valid_lens->reshape([-1]);
        }
     $X = mx->nd->SequenceMask($X->reshape([-1, $shape->[-1]]), $valid_lens, 1,
                               value => -1e6, axis => 1 );
     return mx->nd->softmax($X)->reshape($shape);
    }
}

In [3]:
# Test the function
#my $X_test = mx->np->random->uniform(size => [2, 2, 4]);
#my $valid_lens_test = mx->np->array([2, 3]);
#my $result = masked_softmax($X_test, $valid_lens_test);
#print $result->aspdl;

In [4]:
masked_softmax(mx->nd->random->uniform(shape => [2, 2, 4]), mx->nd->array([2, 3]))->aspdl;
masked_softmax(mx->nd->random->uniform(shape => [2, 2, 4]), mx->nd->array([[1, 3], [2, 4]]))->aspdl;


[
 [
  [       1        0        0        0]
  [0.358484 0.365888 0.275628        0]
 ]
 [
  [0.543703 0.456297        0        0]
  [0.195988 0.255804 0.199167 0.349041]
 ]
]


##### 11.3.2.2. Batch Matrix Multiplication

In [5]:
my $Q = mx->nd->ones([2, 3, 4]);
my $K = mx->nd->ones([2, 4, 6]);
d2l->check_shape(mx->nd->batch_dot($Q, $K), [2, 3, 6]);

1

##### 11.3.3. Scaled Dot Product Attention


In [6]:
package DotProductAttention{
  use base qw(AI::MXNet::Gluon::Block); #@save
  sub new {
    my ($class, %args) = (shift, d2l->get_arguments(dropout => undef, \@_));
    my  $self = $class->SUPER::new();     
    $self->{dropout} = mx->gluon->nn->Dropout($args{dropout});
    map {$self->register_child($self->{$_})} ('dropout');

    return bless ($self, $class);
    #tamaÃ±os formas de queries y todo
 }
 sub masked_softmax{
    my ($self, $X, $valid_lens) = @_; #@save
    #X tensor 
    if (!defined $valid_lens){
        return mx->nd->softmax($X);
    } else {
        my $shape = $X->shape;
        if ($valid_lens->ndim == 1){
        $valid_lens = $valid_lens->repeat($shape->[1]);
        } else {
        $valid_lens = $valid_lens->reshape([-1]);
        }
     $X = mx->nd->SequenceMask($X->reshape([-1, $shape->[-1]]), $valid_lens, 1, value => -1e6, axis => 1 );
     return mx->nd->softmax($X)->reshape($shape);
    }
 }
sub forward {
    my ($self, $queries, $keys, $values, $valid_lens) = @_;
    my $d = $queries->shape->[-1];
    my $scores = mx->nd->batch_dot($queries, $keys, transpose_b => 1) / sqrt($d);
    $self->{attention_weights} = $self->masked_softmax($scores, $valid_lens);
    return mx->nd->batch_dot($self->{dropout}($self->{attention_weights}), $values);
}  
1;
}

1

In [7]:
my $queries = mx->nd->random->normal(0, 1, shape=>[2, 1, 2]);
my $keys = mx->nd->random->normal(0, 1, shape=>[2, 10, 2]);
my $values = mx->nd->random->normal(0, 1, shape=>[2, 10, 4]);
my $valid_lens = mx->nd->array([2,6]);
my $attention = DotProductAttention->new(dropout=> 0.5);
$attention->initialize();
d2l->check_shape($attention->($queries, $keys, $values, $valid_lens), [2, 1, 4]);

1

In [8]:
my $attention_weights = $attention->{attention_weights}->reshape([1, 1, 2, 10]);
d2l->show_heatmaps($attention_weights, xlabel => 'Keys', ylabel => 'Queries');

## 11.3.4. Additive Attention

In [9]:
package AdditiveAttention{
  use base qw(AI::MXNet::Gluon::Block); #@save
  sub new {
    my ($class, %args) = (shift, d2l->get_arguments(num_hiddens => undef ,dropout => undef, \@_));
    my  $self = $class->SUPER::new(%args); 
    #Aplanamos con flatten
    $self->{W_k} = mx->gluon->nn->Dense($args{num_hiddens}, use_bias => 0, flatten => 0);
    $self->{W_q} = mx->gluon->nn->Dense($args{num_hiddens}, use_bias => 0, flatten => 0);
    $self->{w_v} = mx->gluon->nn->Dense(1, use_bias=> 0, flatten => 0);
    $self->{dropout} = mx->gluon->nn->Dropout($args{dropout});
    map {$self->register_child($self->{$_})} ('W_k', 'W_q', 'w_v','dropout'); 
    return bless ($self, $class);
 }
  sub masked_softmax{
    my ($self, $X, $valid_lens) = @_; #@save
    #X tensores
    if (!defined $valid_lens){
        return mx->nd->softmax($X);
    } else {
        my $shape = $X->shape;
        if ($valid_lens->ndim == 1){
        $valid_lens = $valid_lens->repeat($shape->[1]);
        } else {
        $valid_lens = $valid_lens->reshape([-1]);
        }
     # On the last axis, replace masked elements with a very large negative
     # value, whose exponentiation outputs 0
     $X = mx->nd->SequenceMask($X->reshape([-1, $shape->[-1]]), $valid_lens, 1, value => -1e6, axis => 1 );
     return mx->nd->softmax($X)->reshape($shape);
    }
 }
 
  sub forward{
      my ($self, $queries, $keys, $values, $valid_lens) = @_;
       ($queries, $keys) = ($self->{W_q}->forward($queries), $self->{W_k}->forward($keys));
        # After dimension expansion, shape of queries: (batch_size, no. of
        # queries, 1, num_hiddens) and shape of keys: (batch_size, 1,
        # no. of key-value pairs, num_hiddens). Sum them up with
        # broadcasting
        my $features = mx->nd->expand_dims($queries, axis => 2) + mx->nd->expand_dims($keys, axis => 1);
        $features = mx->nd->tanh($features);  
        # There is only one output of self.w_v, so we remove the last
        # one-dimensional entry from the shape. Shape of scores:
        # (batch_size, no. of queries, no. of key-value pairs)
        my $scores = mx->nd->squeeze($self->{w_v}->forward($features), axis => -1);
        $self->{attention_weights} = $self->masked_softmax($scores, $valid_lens);
        # Shape of values: (batch_size, no. of key-value pairs, value
        # dimension)
        return mx->nd->batch_dot($self->{dropout}->forward($self->{attention_weights}), $values);
   }   
1;
}

1

In [10]:
my $queries = mx->nd->random->normal(0, 1, shape=>[2, 1, 20]);
my $attention = AdditiveAttention->new(num_hiddens => 8, dropout => 0.1 );
$attention->initialize();
d2l->check_shape($attention->($queries, $keys, $values, $valid_lens), [2, 1, 4]);

1

In [11]:
my $attention_weights = $attention->{attention_weights}->reshape([1, 1, 2, 10]);
d2l->show_heatmaps($attention_weights, xlabel => 'Keys', ylabel => 'Queries');

In [12]:
#CORREGIR ERRORES

Error: Bareword "ERRORES" not allowed while "strict subs" in use at reply input line 1.

