<a href="https://colab.research.google.com/github/Rakshith12-pixel/Neural_Machine_Translation/blob/main/dl4cvnlp_nmt.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
from tensorflow import keras
from tensorflow.keras import layers
import tensorflow as tf

**This is the baseline model we will be comparing against**

In [None]:
class TransformerEncoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        super().__init__(**kwargs)
        self.embed_dim = embed_dim    # Dimension of embedding. 4 in the dummy example
        self.dense_dim = dense_dim    # No. of neurons in dense layer
        self.num_heads = num_heads    # No. of heads for MultiHead Attention layer
        self.attention = layers.MultiHeadAttention(   # MultiHead Attention layer -
            num_heads=num_heads, key_dim=embed_dim)   # see coloured pic above
        self.dense_proj = keras.Sequential(
            [layers.Dense(dense_dim, activation="relu"),
             layers.Dense(embed_dim),]    # encoders are stacked on top of the other.
        )                                 # So output dimension is also embed_dim
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()

    # Call function based on figure above
    def call(self, inputs, mask_=None):
        if mask_ is not None:
            mask = mask[:, tf.newaxis, :]   # Will discuss in next tutorial
            print(f"**test: mask in not None. mask = {mask_}")

        attention_output = self.attention(
            inputs, inputs, attention_mask=mask_)  # Query: inputs, Value: inputs, Keys: Same as Values by default
                                                  # Q: Can you see how this is self attention?
        proj_input = self.layernorm_1(inputs + attention_output) # LayerNormalization; + Recall cat picture
        proj_output = self.dense_proj(proj_input)
        return self.layernorm_2(proj_input + proj_output)  # LayerNormalization + Residual connection

    def get_config(self):
        config = super().get_config()
        config.update({
            "embed_dim": self.embed_dim,
            "num_heads": self.num_heads,
            "dense_dim": self.dense_dim,
        })
        return config


In [None]:
class PositionalEmbedding(layers.Layer):
    def __init__(self, sequence_length, input_dim, output_dim, **kwargs):
        # input_dim = (token) vocabulary size,  output_dim = embedding size
        super().__init__(**kwargs)

        self.token_embeddings = layers.Embedding(       # Q: what is input_dim and output_dim?
            input_dim=input_dim, output_dim=output_dim)
        self.position_embeddings = layers.Embedding(    # Q: Why input_dim = seq_length?
            input_dim=sequence_length, output_dim=output_dim)   # Q: What is the vocab for this Embedding layer
        self.sequence_length = sequence_length
        self.input_dim = input_dim
        self.output_dim = output_dim

    def call(self, inputs):   # inputs will be a batch of sequences (batch, seq_len)

        length = tf.shape(inputs)[-1]     # lenght will just be sequence length
        positions = tf.range(start=0, limit=length, delta=1) # indices for input to positional embedding
        embedded_tokens = self.token_embeddings(inputs)
        embedded_positions = self.position_embeddings(positions)
        return embedded_tokens + embedded_positions     # ADD the embeddings

    def compute_mask(self, inputs, mask=None):  # makes this layer a mask-generating layer
        return tf.math.not_equal(inputs, 0)     #mask will get propagated to the next layer.

    # When using custom layers, this enables the layer to be reinstantiated from its config dict,
    # which is useful during model saving and loading.
    def get_config(self):
        config = super(PositionalEmbedding, self).get_config()
        config.update({
            "output_dim": self.output_dim,
            "sequence_length": self.sequence_length,
            "input_dim": self.input_dim,
        })
        return config

In [None]:
class TransformerDecoder(layers.Layer):
    def __init__(self, embed_dim, dense_dim, num_heads, **kwargs):
        # Define the layers. Let's point them out in the diagram
        super().__init__(**kwargs)
        self.embed_dim = embed_dim
        self.dense_dim = dense_dim
        self.num_heads = num_heads
        # Now we have 2 MultiHead Attention layers - one for ___ attention and one for ____ attention
        self.attention_1 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim)
        self.attention_2 = layers.MultiHeadAttention(
            num_heads=num_heads, key_dim=embed_dim)
        self.dense_proj = keras.Sequential(
            [layers.Dense(dense_dim, activation="relu"),
             layers.Dense(embed_dim),]
        )
        self.layernorm_1 = layers.LayerNormalization()
        self.layernorm_2 = layers.LayerNormalization()
        self.layernorm_3 = layers.LayerNormalization()
        self.supports_masking = True #ensures that the layer will propagate its input mask to its outputs;

    def get_config(self):
        config = super().get_config()
        config.update({
            "embed_dim": self.embed_dim,
            "num_heads": self.num_heads,
            "dense_dim": self.dense_dim,
        })
        return config

    def get_causal_attention_mask(self, inputs):
        input_shape = tf.shape(inputs)
        batch_size, sequence_length = input_shape[0], input_shape[1]
        i = tf.range(sequence_length)[:, tf.newaxis]
        j = tf.range(sequence_length)
        mask = tf.cast(i >= j, dtype="int32")
        mask = tf.reshape(mask, (1, input_shape[1], input_shape[1])) # sequence_length == input_shape[1]
        mult = tf.concat(
            [tf.expand_dims(batch_size, -1),
              tf.constant([1, 1], dtype=tf.int32)], axis=0)
        return tf.tile(mask, mult)

    def call(self, inputs, encoder_outputs, mask_=None): # two inputs: decoder i/p and encoder o/p
        causal_mask = self.get_causal_attention_mask(inputs)
        attention_output_1 = self.attention_1(    # Q: What kind of attention?
            query=inputs,
            value=inputs,
            key=inputs,
            attention_mask=causal_mask) # Q: What will the causal_mask do?
        attention_output_1 = self.layernorm_1(inputs + attention_output_1)
        attention_output_2 = self.attention_2(  # Q: Is this self attention?
            query=attention_output_1,
            value=encoder_outputs,    # Key and Value coming from encoder hence this is cross attention
            key=encoder_outputs,
            # attention_mask=padding_mask,
        )

        attention_output_2 = self.layernorm_2(
            attention_output_1 + attention_output_2)
        proj_output = self.dense_proj(attention_output_2)
        return self.layernorm_3(attention_output_2 + proj_output)

In [None]:
# English to spanish translation
embed_dim = 256
dense_dim = 2048
num_heads = 8
sequence_length = 20

encoder_inputs = keras.Input(shape=(None,), dtype="int64", name="english")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(encoder_inputs) # Q: First arg acts like a ___ for pos embedding layer
encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)(x) #Q: What are these arguments?

decoder_inputs = keras.Input(shape=(None,), dtype="int64", name="spanish")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(decoder_inputs)
x = TransformerDecoder(embed_dim, dense_dim, num_heads)(x, encoder_outputs,mask_=None) # Q: What are the call arguments in the picture?

x = layers.Dropout(0.5)(x)
decoder_outputs = layers.Dense(vocab_size, activation="softmax")(x)
transformer = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) # Note that there are two input layers
transformer.summary()



Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 english (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 spanish (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 positional_embedding_1 (Po  (None, None, 256)            5125120   ['english[0][0]']             
 sitionalEmbedding)                                                                               
                                                                                                  
 positional_embedding_2 (Po  (None, None, 256)            5125120   ['spanish[0][0]']       

In [None]:
# English to spanish translation
embed_dim = 256
dense_dim = 2048
num_heads = 8
sequence_length = 20

encoder_inputs = keras.Input(shape=(None,), dtype="int64", name="english")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(encoder_inputs) # Q: First arg acts like a ___ for pos embedding layer
encoder_outputs = TransformerEncoder(embed_dim, dense_dim, num_heads)(x) #Q: What are these arguments?

decoder_inputs = keras.Input(shape=(None,), dtype="int64", name="spanish")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(decoder_inputs)
x = TransformerDecoder(embed_dim, dense_dim, num_heads)(x, encoder_outputs,mask_=None) # Q: What are the call arguments in the picture?

x = layers.Dropout(0.5)(x)
decoder_outputs = layers.Dense(vocab_size, activation="softmax")(x)
transformer = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs) # Note that there are two input layers
transformer.summary()



Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 english (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 spanish (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 positional_embedding_1 (Po  (None, None, 256)            5125120   ['english[0][0]']             
 sitionalEmbedding)                                                                               
                                                                                                  
 positional_embedding_2 (Po  (None, None, 256)            5125120   ['spanish[0][0]']       

## Preparing the data

Download the data for english to spanish translation


In [None]:
!wget http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip
!unzip -q spa-eng.zip

--2023-11-26 12:15:49--  http://storage.googleapis.com/download.tensorflow.org/data/spa-eng.zip
Resolving storage.googleapis.com (storage.googleapis.com)... 142.250.145.207, 74.125.128.207, 74.125.143.207, ...
Connecting to storage.googleapis.com (storage.googleapis.com)|142.250.145.207|:80... connected.
HTTP request sent, awaiting response... 200 OK
Length: 2638744 (2.5M) [application/zip]
Saving to: ‘spa-eng.zip’


2023-11-26 12:15:49 (3.77 MB/s) - ‘spa-eng.zip’ saved [2638744/2638744]



In [None]:
!tail spa-eng/spa.txt

You can't view Flash content on an iPad. However, you can easily email yourself the URLs of these web pages and view that content on your regular computer when you get home.	No puedes ver contenido en Flash en un iPad. Sin embargo, puedes fácilmente enviarte por correo electrónico las URL's de esas páginas web y ver el contenido en tu computadora cuando llegas a casa.
A mistake young people often make is to start learning too many languages at the same time, as they underestimate the difficulties and overestimate their own ability to learn them.	Un error que cometen a menudo los jóvenes es el de comenzar a aprender demasiadas lenguas al mismo tiempo, porque subestiman sus dificultades y sobrestiman sus propias capacidades para aprenderlas.
No matter how much you try to convince people that chocolate is vanilla, it'll still be chocolate, even though you may manage to convince yourself and a few others that it's vanilla.	No importa cuánto insistas en convencer a la gente de que el chocol

In [None]:
# pre-processing. Separating input and output sequences
text_file = "spa-eng/spa.txt"
with open(text_file) as f:
    lines = f.read().split("\n")[:-1]
text_pairs = []
for line in lines:
    english, spanish = line.split("\t")
    spanish = "[start] " + spanish + " [end]" #why isnt same done for english??
    text_pairs.append((english, spanish))

import random
print(random.choice(text_pairs))
print(f"no. of pairs: {len(text_pairs)}")

('Tom is just like you.', '[start] Tom es tal y como tú. [end]')
no. of pairs: 118964


In [None]:
#splitting data
random.shuffle(text_pairs)
num_val_samples = int(0.15 * len(text_pairs))
num_train_samples = len(text_pairs) - 2 * num_val_samples
train_pairs = text_pairs[:num_train_samples]
val_pairs = text_pairs[num_train_samples:num_train_samples + num_val_samples]
test_pairs = text_pairs[num_train_samples + num_val_samples:]

In [None]:
import string
print(string.punctuation)

!"#$%&'()*+,-./:;<=>?@[\]^_`{|}~


In [None]:
# Vectorizing the English and Spanish text pairs
import tensorflow as tf
import string
import re

# Define which characters to strip out for spanish data- [, ], ¿
strip_chars = string.punctuation + "¿"  # strip out stadard punctuations + extra one in spanish
strip_chars = strip_chars.replace("[", "")
strip_chars = strip_chars.replace("]", "")
# strio_chars = !"#$%&'()*+,-./:;<=>?@\\^_`{|}~¿

# Custom standardization function for spanish
def custom_standardization(input_string):
    lowercase = tf.strings.lower(input_string)
    return tf.strings.regex_replace(    # Replace elements of input matching regex pattern with rewrite.
        lowercase, f"[{re.escape(strip_chars)}]", "")

vocab_size = 15000
sequence_length = 20

source_vectorization = layers.TextVectorization(
    max_tokens=vocab_size,
    output_mode="int",
    output_sequence_length=sequence_length,
)
target_vectorization = layers.TextVectorization(
    max_tokens=vocab_size,
    output_mode="int",
    output_sequence_length=sequence_length + 1,
    standardize=custom_standardization,
)
train_english_texts = [pair[0] for pair in train_pairs]
train_spanish_texts = [pair[1] for pair in train_pairs]
source_vectorization.adapt(train_english_texts)
target_vectorization.adapt(train_spanish_texts)


In [None]:
#splitting data
random.shuffle(text_pairs)
num_val_samples = int(0.15 * len(text_pairs))
num_train_samples = len(text_pairs) - 2 * num_val_samples
train_pairs = text_pairs[:num_train_samples]
val_pairs = text_pairs[num_train_samples:num_train_samples + num_val_samples]
test_pairs = text_pairs[num_train_samples + num_val_samples:]

In [None]:
# Vectorizing the English and Spanish text pairs
import tensorflow as tf
import string
import re

# Define which characters to strip out for spanish data- [, ], ¿
strip_chars = string.punctuation + "¿"  # strip out stadard punctuations + extra one in spanish
strip_chars = strip_chars.replace("[", "")
strip_chars = strip_chars.replace("]", "")
# strio_chars = !"#$%&'()*+,-./:;<=>?@\\^_`{|}~¿

# Custom standardization function for spanish
def custom_standardization(input_string):
    lowercase = tf.strings.lower(input_string)
    return tf.strings.regex_replace(    # Replace elements of input matching regex pattern with rewrite.
        lowercase, f"[{re.escape(strip_chars)}]", "")

vocab_size = 15000
sequence_length = 20

source_vectorization = layers.TextVectorization(
    max_tokens=vocab_size,
    output_mode="int",
    output_sequence_length=sequence_length,
)
target_vectorization = layers.TextVectorization(
    max_tokens=vocab_size,
    output_mode="int",
    output_sequence_length=sequence_length + 1,
    standardize=custom_standardization,
)
train_english_texts = [pair[0] for pair in train_pairs]
train_spanish_texts = [pair[1] for pair in train_pairs]
source_vectorization.adapt(train_english_texts)
target_vectorization.adapt(train_spanish_texts)


In [None]:
seq = tf.range(10)
dec_in = seq[:-1]
dec_out = seq[1:]

print("original seq")
print(seq)

print("dec_in")
print(dec_in)

print("dec_out")
print(dec_out)

original seq
tf.Tensor([0 1 2 3 4 5 6 7 8 9], shape=(10,), dtype=int32)
dec_in
tf.Tensor([0 1 2 3 4 5 6 7 8], shape=(9,), dtype=int32)
dec_out
tf.Tensor([1 2 3 4 5 6 7 8 9], shape=(9,), dtype=int32)


In [None]:
# Preparing datasets for the translation task

batch_size = 64

# returns tuple- ()
def format_dataset(eng, spa):
    # Q: What are eng and spa pre and post re-assignment
    eng = source_vectorization(eng)
    spa = target_vectorization(spa)
    return ({
        "english": eng,           # encoder nput
        "spanish": spa[:, :-1],    # decoder input Q: what is the first axis?
    }, spa[:, 1:])                  # decoder ouput

def make_dataset(pairs):
    eng_texts, spa_texts = zip(*pairs)
    eng_texts = list(eng_texts)
    spa_texts = list(spa_texts)
    dataset = tf.data.Dataset.from_tensor_slices((eng_texts, spa_texts))
    dataset = dataset.batch(batch_size)
    dataset = dataset.map(format_dataset, num_parallel_calls=4)
    return dataset.shuffle(2048).prefetch(16).cache() #Use in-memory caching to speed up preprocessing.

train_ds = make_dataset(train_pairs)
val_ds = make_dataset(val_pairs)

In [None]:
for inputs, targets in train_ds.take(1):
    print(f"inputs['english'].shape: {inputs['english'].shape}")
    print(f"inputs['spanish'].shape: {inputs['spanish'].shape}")
    print(f"targets.shape: {targets.shape}")
    print(targets[3])
    print(inputs['english'][3])

inputs['english'].shape: (64, 20)
inputs['spanish'].shape: (64, 20)
targets.shape: (64, 20)
tf.Tensor(
[2767    6   51   44   81    6   41    4   32  630   65   20  154    3
    0    0    0    0    0    0], shape=(20,), dtype=int64)
tf.Tensor(
[  3  75 110  62 109  58 688 148 106   0   0   0   0   0   0   0   0   0
   0   0], shape=(20,), dtype=int64)


## Traning and evaluating the model

In [None]:
!pip install transformer

[31mERROR: Could not find a version that satisfies the requirement transformer (from versions: none)[0m[31m
[0m[31mERROR: No matching distribution found for transformer[0m[31m
[0m

In [None]:
import transformer

ModuleNotFoundError: ignored

In [None]:
transformer.compile(
    optimizer="rmsprop",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]) # other metrics like Bleu....
transformer.fit(train_ds, epochs=30, validation_data=val_ds)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x7df3c36e6020>

Note that both the Trans- formerEncoder and the TransformerDecoder are shape-invariant, so you could be stacking many of them to create a more powerful encoder or decoder.

**Trying different architectures**

Model having 3 encoder and decoder layers, to test how the depth of the network affects accuracy

In [None]:
embed_dim = 256
dense_dim = 2048
num_heads = 8
sequence_length = 20

# Encoder
encoder_inputs = keras.Input(shape=(None,), dtype="int64", name="english")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(encoder_inputs)

# Add 3 Encoder Layers
for _ in range(3):
    x = TransformerEncoder(embed_dim, dense_dim, num_heads)(x)

encoder_outputs = x  # Output of the final encoder layer

# Decoder
decoder_inputs = keras.Input(shape=(None,), dtype="int64", name="spanish")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(decoder_inputs)

# Add 3 Decoder Layers
for _ in range(3):
    x = TransformerDecoder(embed_dim, dense_dim, num_heads)(x, encoder_outputs, mask_=None)

x = layers.Dropout(0.5)(x)
decoder_outputs = layers.Dense(vocab_size, activation="softmax")(x)

# Model
transformer_2 = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)
transformer_2.summary()


Model: "model"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 english (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 positional_embedding (Posi  (None, None, 256)            3845120   ['english[0][0]']             
 tionalEmbedding)                                                                                 
                                                                                                  
 transformer_encoder (Trans  (None, None, 256)            3155456   ['positional_embedding[0][0]']
 formerEncoder)                                                                                   
                                                                                              

In [None]:
transformer_2.compile(
    optimizer="rmsprop",
    loss="sparse_categorical_crossentropy",
    metrics=["accuracy"]) # other metrics like Bleu....


In [None]:
transformer_2.fit(train_ds, epochs=30, validation_data=val_ds)

Epoch 1/30
Epoch 2/30
Epoch 3/30
Epoch 4/30
Epoch 5/30
Epoch 6/30
Epoch 7/30
Epoch 8/30
Epoch 9/30
Epoch 10/30
Epoch 11/30
Epoch 12/30
Epoch 13/30
Epoch 14/30
Epoch 15/30
Epoch 16/30
Epoch 17/30
Epoch 18/30
Epoch 19/30
Epoch 20/30
Epoch 21/30
Epoch 22/30
Epoch 23/30
Epoch 24/30
Epoch 25/30
Epoch 26/30
Epoch 27/30
Epoch 28/30
Epoch 29/30
Epoch 30/30


<keras.src.callbacks.History at 0x7cae9c4fdc60>

**We observe a much poorer performance despite deeper depth on the vanilla Transformer. Let us try to extend the depth more one time....**

In [None]:
embed_dim = 256
dense_dim = 2048
num_heads = 8
sequence_length = 20

# Encoder
encoder_inputs = keras.Input(shape=(None,), dtype="int64", name="english")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(encoder_inputs)

# Add 3 Encoder Layers
for _ in range(9):
    x = TransformerEncoder(embed_dim, dense_dim, num_heads)(x)

encoder_outputs = x  # Output of the final encoder layer

# Decoder
decoder_inputs = keras.Input(shape=(None,), dtype="int64", name="spanish")
x = PositionalEmbedding(sequence_length, vocab_size, embed_dim)(decoder_inputs)

# Add 3 Decoder Layers
for _ in range(9):
    x = TransformerDecoder(embed_dim, dense_dim, num_heads)(x, encoder_outputs, mask_=None)

x = layers.Dropout(0.5)(x)
decoder_outputs = layers.Dense(vocab_size, activation="softmax")(x)

# Model
transformer_3 = keras.Model([encoder_inputs, decoder_inputs], decoder_outputs)
transformer_3.summary()

Model: "model_1"
__________________________________________________________________________________________________
 Layer (type)                Output Shape                 Param #   Connected to                  
 english (InputLayer)        [(None, None)]               0         []                            
                                                                                                  
 positional_embedding_2 (Po  (None, None, 256)            3845120   ['english[0][0]']             
 sitionalEmbedding)                                                                               
                                                                                                  
 transformer_encoder_3 (Tra  (None, None, 256)            3155456   ['positional_embedding_2[0][0]
 nsformerEncoder)                                                   ']                            
                                                                                            

In [None]:
import matplotlib.pyplot as plt

In [None]:
history = transformer_2.fit(train_ds, epochs=30, validation_data=val_ds)

# Visualize Loss Over Epochs
#plt.plot(history.history['loss'], label='Training Loss')
#plt.plot(history.history['val_loss'], label='Validation Loss')
#plt.title('Loss Over Epochs')
#plt.xlabel('Epochs')
#plt.ylabel('Loss')
#plt.legend()
#plt.show()

import matplotlib.pyplot as plt
from matplotlib.animation import FuncAnimation

# Assuming you've already compiled the model and have the training data
# ...

# Create a figure and axis for the live plot
fig, ax = plt.subplots()
ax.set_title('Dynamic Loss Update')
ax.set_xlabel('Epochs')
ax.set_ylabel('Loss')

# Initialize empty lines for training and validation loss
line_train, = ax.plot([], [], label='Training Loss')
line_val, = ax.plot([], [], label='Validation Loss')
ax.legend()

def update(epoch):
    # Assuming you have access to the training history
    line_train.set_data(range(1, epoch + 1), history.history['loss'][:epoch])
    line_val.set_data(range(1, epoch + 1), history.history['val_loss'][:epoch])
    return line_train, line_val

# Set the number of frames to the number of epochs
num_epochs = 10  # Adjust as needed
ani = FuncAnimation(fig, update, frames=range(1, num_epochs + 1), blit=True)

plt.show()


NameError: ignored