# Attention for explaining text generation
We would like to know if we can used the attention of a model to explain where he gets his informations to generate an answer.

## Loading a model

In [1]:
import torch
from transformers import AutoTokenizer, AutoModelForCausalLM
torch.set_default_device("cuda")

path_model = "openchat/openchat-3.5-0106" #"microsoft/phi-2"
model = AutoModelForCausalLM.from_pretrained(path_model, torch_dtype="auto", trust_remote_code=True, attn_implementation="eager")
tokenizer = AutoTokenizer.from_pretrained(path_model, trust_remote_code=True)


Loading checkpoint shards:   0%|          | 0/3 [00:00<?, ?it/s]

Special tokens have been added in the vocabulary, make sure the associated word embeddings are fine-tuned or trained.


#### Asking 3 differents questions

In [10]:
question = "What is the most common tree in south of England? Who is the first emperor of france? Who is the last king of France?"
inputs = tokenizer(question, return_tensors="pt")

outputs = model.generate(**inputs, max_length=80,return_dict_in_generate=True, output_attentions= True)


In [11]:
text = tokenizer.batch_decode(outputs[0])[0]

In [12]:
print(text)

<s> What is the most common tree in south of England? Who is the first emperor of france? Who is the last king of France? What is the most common tree in south of England? The most common tree in the south of England is the Oak tree. The first emperor of France was Napoleon Bonaparte. The last king of France was Louis XVI.

## What is the


#### Expectations
We have a structured answer with 3 sentences were each one answer 1 questions
So we except that the attention for generating each answer is highly correlated to question wich is refering.

In [29]:
from transformers.generation import GenerateDecoderOnlyOutput
import numpy as np

class Attentions():
    """
    Class to manage attention given by the outputs of our model.
    """
    
    def __init__(self, outputs: GenerateDecoderOnlyOutput, heads: list[int]):
        self.n = outputs[0].shape[1]
        self.attentions = self.get_attentions(outputs, heads)

    def get_attentions(self, outputs: GenerateDecoderOnlyOutput, heads: list[int]) -> np.array:
        """
        Transform the attention tensor to a nice numpy array 
        where the first argument is the generated token
        and the second one is the attention on one of the previous token.

        Ex attentions[i, j] is the attention used on j for generating i.

        *** heads : is wich heads we looks at (sum is made over all those heads)
        """
        layer = 0 #The layer where we look at the attentionn
        n = outputs[0].shape[1]
        result = np.zeros((n,n))
        attentions = outputs.attentions
        h = len(attentions[0])
        p = len(attentions)
        s = torch.sum(attentions[0][layer].squeeze(), dim =0)
        for i in range(n-p):
            for j in range(i+1):
                result[i+1][j] = s[i][j]      
        for i in range(1,p):
            s = torch.sum(attentions[i][layer].squeeze()[heads], dim =0)
            for j in range(i + n - p):
                result[i+n-p][j] = np.float64(s[j])
        return result/h

    def get_attentions_for_seq(self, seq :np.array) -> np.array:
        """
        Return the attention for all token to a given sequence
        So as input the generated token
        """
        return sum(self.attentions[seq])/len(seq)
        
    def get_attention_from_seq(self, att_seq: np.array, seq :np.array) ->np.float64:
        """
        Return the global attention from att_seq to seq
        Seq are the input token that we are interested in
        """
        return sum(att_seq[seq])/len(seq)

    def attention_from_seq_to_seq(self, from_seq: np.array, for_seq: np.array) -> np.float64:
        """
        For_seq is the generated tokens
        From seq is the based tokens
        """
        return self.get_attention_from_seq(self.get_attentions_for_seq(for_seq), from_seq)
        
    

In [30]:
tensor_outputs = outputs[0][0]
def print_token(i):
   return tokenizer.decode(tensor_outputs[i])
    
def print_token_nb(outputs):
    s= ''
    for i in range(len(outputs[0][0])):
        s +=print_token(i) +'('+str(i)+')'
    print(s)

print_token_nb(outputs)

<s>(0)What(1)is(2)the(3)most(4)common(5)tree(6)in(7)south(8)of(9)England(10)?(11)Who(12)is(13)the(14)first(15)emperor(16)of(17)fr(18)ance(19)?(20)Who(21)is(22)the(23)last(24)king(25)of(26)France(27)?(28)What(29)is(30)the(31)most(32)common(33)tree(34)in(35)south(36)of(37)England(38)?(39)The(40)most(41)common(42)tree(43)in(44)the(45)south(46)of(47)England(48)is(49)the(50)Oak(51)tree(52).(53)The(54)first(55)emperor(56)of(57)France(58)was(59)Napoleon(60)Bon(61)ap(62)arte(63).(64)The(65)last(66)king(67)of(68)France(69)was(70)Louis(71)XVI(72).(73)
(74)
(75)##(76)What(77)is(78)the(79)


In [31]:
input_roi = np.array(range(0,11))
input_emperor = np.array(range(11,20))
input_tree = np.array(range(20,28))

output_roi = np.array(range(40,53))
output_emperor = np.array(range(54,64))
output_tree = np.array(range(64,73))

inputs_seq = [input_roi,input_emperor,input_tree]
outputs_seq = [output_roi,output_emperor,output_tree]


In [32]:
for k in range(32):
    heads = [k]
    att = Attentions(outputs, heads)
    correspondance = np.zeros((3,3))
    for i in range(3):
        for j in range(3):
            correspondance[i,j]= att.attention_from_seq_to_seq(inputs_seq[i], outputs_seq[j])
    print(k)
    print(np.argmax(correspondance*100, axis =0))

0
[0 0 0]
1
[0 0 0]
2
[0 0 0]
3
[0 0 0]
4
[0 0 0]
5
[0 0 0]
6
[0 0 0]
7
[0 0 0]
8
[0 0 0]
9
[0 0 0]
10
[0 0 0]
11
[0 0 0]
12
[0 0 0]
13
[0 0 0]
14
[0 0 0]
15
[0 0 0]
16
[1 1 1]
17
[0 0 0]
18
[0 0 0]
19
[2 0 0]
20
[0 0 0]
21
[0 0 0]
22
[0 0 0]
23
[0 0 0]
24
[0 0 0]
25
[0 0 0]
26
[2 0 0]
27
[0 0 0]
28
[0 0 0]
29
[0 0 0]
30
[2 0 0]
31
[0 0 0]


### Results
Nothing came up on this we cant clearly relate on attention to see what have been used for generation the answer.