In [1]:
#1.Write a code to generate a random sentence using probabilistic modeling
#(Markov Chain). Use the sentence "The cat is on the mat" as an exampl?

In [2]:
import random

def build_markov_chain(text):
    words = text.split()
    chain = {}
    for i in range(len(words) - 1):
        word = words[i]
        next_word = words[i + 1]
        chain.setdefault(word, []).append(next_word)
    return chain

def generate_sentence(chain, length=10):
    word = random.choice(list(chain.keys()))
    sentence = [word]
    for _ in range(length - 1):
        next_words = chain.get(word, None)
        if not next_words:
            break
        word = random.choice(next_words)
        sentence.append(word)
    return " ".join(sentence)

text = "The cat is on the mat"
markov_chain = build_markov_chain(text)
random_sentence = generate_sentence(markov_chain, length=6)
print("Generated sentence:", random_sentence)


Generated sentence: is on the mat


In [3]:
#2.Build a simple Autoencoder model using Keras to learn a compressed
#representation of a given sentence. Use a dataset of your choice?


In [4]:
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Dense


input_dim = 100 
encoding_dim = 32  

input_layer = Input(shape=(input_dim,))
encoded = Dense(encoding_dim, activation='relu')(input_layer)
decoded = Dense(input_dim, activation='sigmoid')(encoded)

autoencoder = Model(input_layer, decoded)


autoencoder.compile(optimizer='adam', loss='mse')

# Example training
import numpy as np
data = np.random.rand(1000, input_dim)  # Random dataset for demonstration
autoencoder.fit(data, data, epochs=10, batch_size=32, validation_split=0.2)

encoder = Model(input_layer, encoded)
print("Autoencoder built successfully!")


Epoch 1/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 17ms/step - loss: 0.0865 - val_loss: 0.0834
Epoch 2/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0832 - val_loss: 0.0831
Epoch 3/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 8ms/step - loss: 0.0828 - val_loss: 0.0828
Epoch 4/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0820 - val_loss: 0.0823
Epoch 5/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0813 - val_loss: 0.0814
Epoch 6/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 9ms/step - loss: 0.0800 - val_loss: 0.0802
Epoch 7/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0786 - val_loss: 0.0788
Epoch 8/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 7ms/step - loss: 0.0764 - val_loss: 0.0774
Epoch 9/10
[1m25/25[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [7]:
#3.Use the Hugging Face transformers library to fine-tune a pre-trained GPT-2
#model on a custom text data and generate text

In [10]:
#4.Implement a text generation model using a simple Recurrent Neural
##Network (RNN) in Keras. Train the model on a custom data and generate a
#word?

In [12]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, SimpleRNN, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Step 1: Prepare the custom dataset
def prepare_dataset(text, max_sequence_length=5):

    # Tokenize the text
    tokenizer = Tokenizer()
    tokenizer.fit_on_texts([text])
    sequences = tokenizer.texts_to_sequences([text])[0]

    # Create input-output pairs
    X, y = [], []
    for i in range(len(sequences) - max_sequence_length):
        X.append(sequences[i:i + max_sequence_length])
        y.append(sequences[i + max_sequence_length])

    X = np.array(X)
    y = np.array(y)

    return X, y, tokenizer, len(tokenizer.word_index) + 1

# Example custom text data
text_data = "Once upon a time there was a king who ruled over a great kingdom with wisdom and courage."

# Prepare the dataset
max_sequence_length = 5
X, y, tokenizer, vocab_size = prepare_dataset(text_data, max_sequence_length)

# Step 2: Build the RNN model
model = Sequential([
    Embedding(input_dim=vocab_size, output_dim=10), 
    SimpleRNN(64, return_sequences=False),
    Dense(vocab_size, activation='softmax')
])

model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()


model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
model.summary()

# Step 3: Train the model
model.fit(X, y, epochs=100, verbose=2)

# Step 4: Generate text
def generate_text(prompt, num_words, model, tokenizer, max_sequence_length):
  
    result = prompt
    for _ in range(num_words):
        # Tokenize the prompt
        encoded = tokenizer.texts_to_sequences([result.split()])[-1]
        encoded = pad_sequences([encoded], maxlen=max_sequence_length, truncating='pre')

        # Predict the next word
        prediction = np.argmax(model.predict(encoded), axis=-1)
        next_word = tokenizer.index_word[prediction[0]]

        # Append the predicted word to the result
        result += " " + next_word

    return result

# Generate text
prompt = "Once upon a"
generated_text = generate_text(prompt, num_words=10, model=model, tokenizer=tokenizer, max_sequence_length=max_sequence_length)
print(f"Generated Text: {generated_text}")


Epoch 1/100
1/1 - 2s - 2s/step - accuracy: 0.0000e+00 - loss: 2.8442
Epoch 2/100
1/1 - 0s - 79ms/step - accuracy: 0.0000e+00 - loss: 2.8267
Epoch 3/100
1/1 - 0s - 74ms/step - accuracy: 0.0769 - loss: 2.8092
Epoch 4/100
1/1 - 0s - 78ms/step - accuracy: 0.2308 - loss: 2.7915
Epoch 5/100
1/1 - 0s - 80ms/step - accuracy: 0.2308 - loss: 2.7736
Epoch 6/100
1/1 - 0s - 88ms/step - accuracy: 0.3077 - loss: 2.7552
Epoch 7/100
1/1 - 0s - 85ms/step - accuracy: 0.3846 - loss: 2.7361
Epoch 8/100
1/1 - 0s - 84ms/step - accuracy: 0.3846 - loss: 2.7162
Epoch 9/100
1/1 - 0s - 78ms/step - accuracy: 0.3846 - loss: 2.6953
Epoch 10/100
1/1 - 0s - 76ms/step - accuracy: 0.3846 - loss: 2.6732
Epoch 11/100
1/1 - 0s - 116ms/step - accuracy: 0.3846 - loss: 2.6497
Epoch 12/100
1/1 - 0s - 80ms/step - accuracy: 0.4615 - loss: 2.6247
Epoch 13/100
1/1 - 0s - 82ms/step - accuracy: 0.3846 - loss: 2.5980
Epoch 14/100
1/1 - 0s - 78ms/step - accuracy: 0.3846 - loss: 2.5695
Epoch 15/100
1/1 - 0s - 81ms/step - accuracy: 0.38

In [13]:
#5.Write a program to generate a sequence of text using an LSTM-based
#model in TensorFlow, trained on a custom data of sentences?

In [14]:

from tensorflow.keras.layers import LSTM, Embedding, Dense, Dropout


# Sample custom dataset (replace with your own data)
data = [
    "The sun rises in the east",
    "The moon shines at night",
    "The stars twinkle in the sky",
    "The earth orbits the sun",
    "The cat sat on the mat"
]

# Step 1: Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data)
total_words = len(tokenizer.word_index) + 1  # Include padding token

# Generate input sequences
input_sequences = []
for line in data:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i + 1]
        input_sequences.append(n_gram_sequence)

# Pad sequences and create features/labels
max_sequence_len = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')

X = input_sequences[:, :-1]
y = tf.keras.utils.to_categorical(input_sequences[:, -1], num_classes=total_words)

# Step 2: Build the LSTM model
model = Sequential([
    Embedding(total_words, 50, input_length=max_sequence_len - 1),
    LSTM(150),
    Dropout(0.2),
    Dense(total_words, activation='softmax')
])

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# Step 3: Train the model
history = model.fit(X, y, epochs=100, verbose=2)

# Step 4: Generate text
def generate_text(seed_text, next_words, model, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
        predicted = model.predict(token_list, verbose=0)
        next_word = tokenizer.index_word[np.argmax(predicted)]
        seed_text += " " + next_word
    return seed_text

# Example usage
seed_text = "The sun"
generated_text = generate_text(seed_text, next_words=10, model=model, max_sequence_len=max_sequence_len)
print("Generated text:", generated_text)


Epoch 1/100
1/1 - 3s - 3s/step - accuracy: 0.0000e+00 - loss: 2.9461
Epoch 2/100
1/1 - 0s - 81ms/step - accuracy: 0.0870 - loss: 2.9413
Epoch 3/100
1/1 - 0s - 85ms/step - accuracy: 0.1304 - loss: 2.9380
Epoch 4/100
1/1 - 0s - 86ms/step - accuracy: 0.2609 - loss: 2.9314
Epoch 5/100
1/1 - 0s - 84ms/step - accuracy: 0.2174 - loss: 2.9257
Epoch 6/100
1/1 - 0s - 89ms/step - accuracy: 0.1739 - loss: 2.9218
Epoch 7/100
1/1 - 0s - 69ms/step - accuracy: 0.1739 - loss: 2.9185
Epoch 8/100
1/1 - 0s - 77ms/step - accuracy: 0.1739 - loss: 2.9127
Epoch 9/100
1/1 - 0s - 80ms/step - accuracy: 0.1739 - loss: 2.9059
Epoch 10/100
1/1 - 0s - 78ms/step - accuracy: 0.1739 - loss: 2.8963
Epoch 11/100
1/1 - 0s - 86ms/step - accuracy: 0.1739 - loss: 2.8921
Epoch 12/100
1/1 - 0s - 72ms/step - accuracy: 0.1739 - loss: 2.8889
Epoch 13/100
1/1 - 0s - 98ms/step - accuracy: 0.1739 - loss: 2.8777
Epoch 14/100
1/1 - 0s - 66ms/step - accuracy: 0.1739 - loss: 2.8656
Epoch 15/100
1/1 - 0s - 89ms/step - accuracy: 0.1739 - 

In [15]:
#6.Build a program that uses GPT-2 from Hugging Face to generate a story
#based on a custom promptI

In [18]:
!pip install transformers




DEPRECATION: Loading egg at c:\users\mohd yusuf haider\anaconda3\lib\site-packages\fault_detection-0.0.0-py3.12.egg is deprecated. pip 24.3 will enforce this behaviour change. A possible replacement is to use pip for package installation. Discussion can be found at https://github.com/pypa/pip/issues/12330





In [19]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer

# Load pre-trained GPT-2 model and tokenizer from Hugging Face
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Function to generate a story based on the custom prompt
def generate_story(prompt, max_length=200, temperature=0.7, top_p=0.9):
    # Encode the input prompt
    input_ids = tokenizer.encode(prompt, return_tensors='pt')

    # Generate the output sequence
    output = model.generate(
        input_ids,
        max_length=max_length,
        temperature=temperature,
        top_p=top_p,
        num_return_sequences=1,
        no_repeat_ngram_size=2,
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode and return the generated text
    story = tokenizer.decode(output[0], skip_special_tokens=True)
    return story

# Example custom prompt
prompt = "Once upon a time in a distant kingdom, there was a young prince who"

# Generate the story
story = generate_story(prompt, max_length=200, temperature=0.7, top_p=0.9)

# Print the generated story
print("Generated Story:\n")
print(story)


tokenizer_config.json:   0%|          | 0.00/26.0 [00:00<?, ?B/s]

To support symlinks on Windows, you either need to activate Developer Mode or to run Python as an administrator. In order to activate developer mode, see this article: https://docs.microsoft.com/en-us/windows/apps/get-started/enable-your-device-for-development


vocab.json:   0%|          | 0.00/1.04M [00:00<?, ?B/s]

merges.txt:   0%|          | 0.00/456k [00:00<?, ?B/s]

tokenizer.json:   0%|          | 0.00/1.36M [00:00<?, ?B/s]

config.json:   0%|          | 0.00/665 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/548M [00:00<?, ?B/s]

generation_config.json:   0%|          | 0.00/124 [00:00<?, ?B/s]

The attention mask is not set and cannot be inferred from input because pad token is same as eos token. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.


Generated Story:

Once upon a time in a distant kingdom, there was a young prince who was the son of a nobleman. He was called the Prince of the House of King's Landing.

The Prince was born in the year of Aes Sedai, and was raised in his father's house. His father was an old man, but he was very good at his craft. The Prince had a great deal of knowledge of magic, as well as of history. When he had been a child, he learned to read and write, to write and speak, so that he could write. But he did not know how to speak. So he began to learn to use magic. And when he came to the kingdom of Westeros, his mother was there, with her husband, the King of Winterfell. She was not a very well-educated woman, for she was only a little girl. Her father had died when she had grown up, when her father died, she grew up in an


In [20]:
#7.Write a code to implement a simple text generation model using a GRUbased architecture in Keras?

In [22]:

from tensorflow.keras.layers import GRU, Embedding, Dense, Dropout



data = [
    "The sun rises in the east",
    "The moon shines at night",
    "The stars twinkle in the sky",
    "The earth orbits the sun",
    "The cat sat on the mat"
]

# Step 1: Tokenize the text data
tokenizer = Tokenizer()
tokenizer.fit_on_texts(data)
total_words = len(tokenizer.word_index) + 1  

# Generate input sequences
input_sequences = []
for line in data:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i + 1]
        input_sequences.append(n_gram_sequence)

# Pad sequences and create features/labels
max_sequence_len = max([len(seq) for seq in input_sequences])
input_sequences = pad_sequences(input_sequences, maxlen=max_sequence_len, padding='pre')

X = input_sequences[:, :-1]
y = tf.keras.utils.to_categorical(input_sequences[:, -1], num_classes=total_words)

# Step 2: Build the GRU model
# Step 2: Build the GRU model
model = Sequential([
    Embedding(total_words, 50, input_length=max_sequence_len - 1),  # Remove input_length argument
    GRU(150),
    Dropout(0.2),
    Dense(total_words, activation='softmax')
])


model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])
model.summary()

# Step 3: Train the model
history = model.fit(X, y, epochs=100, verbose=2)

# Step 4: Generate text
def generate_text(seed_text, next_words, model, max_sequence_len):
    for _ in range(next_words):
        token_list = tokenizer.texts_to_sequences([seed_text])[0]
        token_list = pad_sequences([token_list], maxlen=max_sequence_len - 1, padding='pre')
        predicted = model.predict(token_list, verbose=0)
        next_word = tokenizer.index_word[np.argmax(predicted)]
        seed_text += " " + next_word
    return seed_text

# Example usage
seed_text = "The sun"
generated_text = generate_text(seed_text, next_words=10, model=model, max_sequence_len=max_sequence_len)
print("Generated text:\n")
print(generated_text)


Epoch 1/100
1/1 - 4s - 4s/step - accuracy: 0.0435 - loss: 2.9437
Epoch 2/100
1/1 - 0s - 79ms/step - accuracy: 0.1739 - loss: 2.9369
Epoch 3/100
1/1 - 0s - 78ms/step - accuracy: 0.3043 - loss: 2.9281
Epoch 4/100
1/1 - 0s - 75ms/step - accuracy: 0.2609 - loss: 2.9177
Epoch 5/100
1/1 - 0s - 75ms/step - accuracy: 0.2174 - loss: 2.9118
Epoch 6/100
1/1 - 0s - 80ms/step - accuracy: 0.1739 - loss: 2.9066
Epoch 7/100
1/1 - 0s - 81ms/step - accuracy: 0.2174 - loss: 2.8971
Epoch 8/100
1/1 - 0s - 85ms/step - accuracy: 0.2174 - loss: 2.8835
Epoch 9/100
1/1 - 0s - 86ms/step - accuracy: 0.2174 - loss: 2.8760
Epoch 10/100
1/1 - 0s - 76ms/step - accuracy: 0.1739 - loss: 2.8652
Epoch 11/100
1/1 - 0s - 78ms/step - accuracy: 0.1739 - loss: 2.8604
Epoch 12/100
1/1 - 0s - 83ms/step - accuracy: 0.1739 - loss: 2.8479
Epoch 13/100
1/1 - 0s - 81ms/step - accuracy: 0.1739 - loss: 2.8391
Epoch 14/100
1/1 - 0s - 66ms/step - accuracy: 0.1739 - loss: 2.8243
Epoch 15/100
1/1 - 0s - 71ms/step - accuracy: 0.1739 - loss

In [23]:
#8.I Create a script to implement GPT-2-based text generation with beam
#search decoding to generate text?

In [24]:
from transformers import GPT2LMHeadModel, GPT2Tokenizer
import torch

# Load the pre-trained GPT-2 model and tokenizer from Hugging Face
model_name = "gpt2"
tokenizer = GPT2Tokenizer.from_pretrained(model_name)
model = GPT2LMHeadModel.from_pretrained(model_name)

# Function to generate text with beam search decoding
def generate_text_with_beam_search(prompt, num_beams=5, max_length=100, temperature=1.0, no_repeat_ngram_size=2):
    # Encode the input prompt to tokens
    input_ids = tokenizer.encode(prompt, return_tensors='pt')
    
    # Generate text using beam search
    beam_output = model.generate(
        input_ids,
        max_length=max_length,
        num_beams=num_beams,
        temperature=temperature,
        no_repeat_ngram_size=no_repeat_ngram_size,
        early_stopping=True
    )

    # Decode the output sequence to text
    generated_text = tokenizer.decode(beam_output[0], skip_special_tokens=True)
    
    return generated_text

# Example usage
prompt = "Once upon a time in a distant kingdom, there was a brave knight who"
generated_text = generate_text_with_beam_search(prompt, num_beams=5, max_length=100)

# Print the generated story
print("Generated Story with Beam Search:\n")
print(generated_text)


The attention mask and the pad token id were not set. As a consequence, you may observe unexpected behavior. Please pass your input's `attention_mask` to obtain reliable results.
Setting `pad_token_id` to `eos_token_id`:50256 for open-end generation.


Generated Story with Beam Search:

Once upon a time in a distant kingdom, there was a brave knight who fought valiantly against the enemy.

He was the son of a noble nobleman, and he had been knighted by his father. He had fought in the Battle of the Bastille, in which he lost his life. His father had said to him, "If you wish to become a knight, you must be able to do so. If you do not, then you will not become one. You will


In [25]:
#9.Implement a text generation script using GPT-2 with a custom temperature
#setting for diversity in output text

In [26]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel

def generate_text_with_temperature(prompt, temperature=1.0, max_length=100):
  
    # Load pre-trained GPT-2 model and tokenizer
    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
    model = GPT2LMHeadModel.from_pretrained("gpt2")

    # Encode the prompt text to get token IDs
    inputs = tokenizer.encode(prompt, return_tensors="pt")

    # Generate text with the specified temperature
    outputs = model.generate(
        inputs,
        max_length=max_length,
        temperature=temperature,
        num_return_sequences=1,
        no_repeat_ngram_size=2, 
        top_p=0.92,  
        top_k=50, 
        do_sample=True,  
        pad_token_id=tokenizer.eos_token_id  
    )

    # Decode the generated tokens to human-readable text
    generated_text = tokenizer.decode(outputs[0], skip_special_tokens=True)
    
    return generated_text

# Example usage
prompt = "In the future, AI will"
temperature = 0.8  
generated_text = generate_text_with_temperature(prompt, temperature, max_length=100)

print("Generated Text:\n", generated_text)


Generated Text:
 In the future, AI will also learn how to work with computers to find patterns in objects. That will help us in developing a better understanding of why human beings behave differently and why we have different emotions.

We will then start to learn more about the behavior of the human brain in everyday life. This will enable us to better understand how we are different and what we need to do to achieve what humans need. The future of human behavior will involve better tools and tools for developing better ways of


In [27]:
#10.Create a script to implement temperature sampling with GPT-2,
#experimenting with different values to generate creative text

In [28]:
from transformers import GPT2Tokenizer, GPT2LMHeadModel
import torch

def generate_text_with_temperature(prompt, temperature=1.0, max_length=100):
 
    tokenizer = GPT2Tokenizer.from_pretrained("gpt2")
    model = GPT2LMHeadModel.from_pretrained("gpt2")

    # Encode the input prompt
    inputs = tokenizer.encode(prompt, return_tensors="pt")

    # Generate text using the specified temperature
    outputs = model.generate(
        inputs,
        max_length=max_length,
        temperature=temperature,
        num_return_sequences=1,
        top_p=0.92,  
        top_k=50,   
        do_sample=True, 
        no_repeat_ngram_size=2,  
        pad_token_id=tokenizer.eos_token_id
    )

    # Decode and return the generated text
    return tokenizer.decode(outputs[0], skip_special_tokens=True)

# Experiment with different temperatures
def experiment_with_temperatures(prompt, temperatures, max_length=100):

    print(f"Prompt: {prompt}\n{'-'*50}")
    for temp in temperatures:
        print(f"Temperature: {temp}")
        generated_text = generate_text_with_temperature(prompt, temperature=temp, max_length=max_length)
        print(f"Generated Text:\n{generated_text}\n{'-'*50}")

# Example usage
if __name__ == "__main__":
    prompt = "The future of artificial intelligence"
    temperatures = [0.5, 1.0, 1.5]  
    max_length = 100

    experiment_with_temperatures(prompt, temperatures, max_length=max_length)


Prompt: The future of artificial intelligence
--------------------------------------------------
Temperature: 0.5
Generated Text:
The future of artificial intelligence is uncertain. The future is still a mystery.

The Future of Artificial Intelligence
. . .
, A. A., A, L., & G. Gómez-Bruno. (2015). The Future Of Artificial intelligence: A Critical Review. Retrieved from http://www.sciencedirect.com/science/article/pii/S00251439005908/full
 and http/pdf/Abstract/A_
--------------------------------------------------
Temperature: 1.0
Generated Text:
The future of artificial intelligence? Will it really make humanity as better as possible, and will it eventually win?

This answer could have a huge impact on how we think about life. But the best answers don't always lie within our immediate future, which is why we should have this conversation before we do our business.
/p/k/
- - -
[Note: The author's first name is Patrick.]
I know that this post will sound too complicated, but please
-----

In [29]:
#11.I How can you implement text generation using it in a simple custom
#attention-based architecture?

In [40]:
import tensorflow as tf
from tensorflow.keras.models import Model
from tensorflow.keras.layers import Input, Embedding, LSTM, Dense, Attention

# Sample data: input-output sentence pairs (toy dataset)
data = [
    ("the sky is blue", "el cielo es azul"),
    ("the sun is bright", "el sol es brillante"),
    ("the grass is green", "la hierba es verde"),
]

# Preprocess the data
input_texts = [pair[0] for pair in data]
output_texts = ["<start> " + pair[1] + " <end>" for pair in data]

# Tokenize the input and output texts
input_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
input_tokenizer.fit_on_texts(input_texts)
input_sequences = input_tokenizer.texts_to_sequences(input_texts)
input_vocab_size = len(input_tokenizer.word_index) + 1

output_tokenizer = tf.keras.preprocessing.text.Tokenizer(filters='')
output_tokenizer.fit_on_texts(output_texts)
output_sequences = output_tokenizer.texts_to_sequences(output_texts)
output_vocab_size = len(output_tokenizer.word_index) + 1

# Pad sequences to ensure uniform length
max_input_len = max(len(seq) for seq in input_sequences)
max_output_len = max(len(seq) for seq in output_sequences)

input_sequences = tf.keras.preprocessing.sequence.pad_sequences(input_sequences, maxlen=max_input_len, padding='post')
output_sequences = tf.keras.preprocessing.sequence.pad_sequences(output_sequences, maxlen=max_output_len, padding='post')

# Prepare features and labels for training
encoder_input_data = input_sequences
decoder_input_data = output_sequences[:, :-1]
decoder_target_data = output_sequences[:, 1:]

# Build the Encoder-Decoder Model with Attention
latent_dim = 256

# Encoder
encoder_inputs = Input(shape=(max_input_len,))
encoder_embedding = Embedding(input_vocab_size, latent_dim)(encoder_inputs)
encoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
encoder_outputs, state_h, state_c = encoder_lstm(encoder_embedding)

# Decoder
decoder_inputs = Input(shape=(max_output_len - 1,))
decoder_embedding = Embedding(output_vocab_size, latent_dim)(decoder_inputs)
decoder_lstm = LSTM(latent_dim, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(decoder_embedding, initial_state=[state_h, state_c])

# Attention mechanism
attention = Attention()
context_vector = attention([decoder_outputs, encoder_outputs])
from tensorflow.keras.layers import Concatenate

decoder_combined_context = Concatenate(axis=-1)([decoder_outputs, context_vector])


# Dense output layer
decoder_dense = Dense(output_vocab_size, activation='softmax')
decoder_outputs = decoder_dense(decoder_combined_context)

# Define the model
model = Model([encoder_inputs, decoder_inputs], decoder_outputs)
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

