<a href="https://colab.research.google.com/github/Aathi005/Deep-Learning/blob/main/Exp_6_core.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [4]:
import nltk
from keras.models import Model
from keras.layers import Input, LSTM, Dense, Embedding
from keras.preprocessing.sequence import pad_sequences
import numpy as np

# Sample data
input_texts = ['I love NLP', 'He plays football']
target_texts = [['PRON', 'VERB', 'NOUN'], ['PRON', 'VERB', 'NOUN']]

# Tokenization (simplified for small dataset)
word_vocab = sorted(set(word for sent in input_texts for word in sent.split()))
tag_vocab = sorted(set(tag for tags in target_texts for tag in tags))
word2idx = {word: i + 1 for i, word in enumerate(word_vocab)}
tag2idx = {tag: i for i, tag in enumerate(tag_vocab)}

# Convert sequences to integers and pad
max_seq_length = max(len(sent.split()) for sent in input_texts)
encoder_input_data = np.array(
    pad_sequences(
        [[word2idx[word] for word in sent.split()] for sent in input_texts],
        maxlen=max_seq_length,
        padding='post',
    )
)
decoder_output_data = np.array(
    pad_sequences(
        [[tag2idx[tag] for tag in tags] for tags in target_texts],
        maxlen=max_seq_length,
        padding='post',
    )
)


# Model (simplified)
embedding_dim = 50  # You can adjust this
hidden_units = 64

# Encoder
encoder_inputs = Input(shape=(None,), name='encoder_inputs')
encoder_embedding = Embedding(
    input_dim=len(word_vocab) + 1, output_dim=embedding_dim
)(encoder_inputs)
encoder = LSTM(hidden_units, return_state=True)
encoder_outputs, state_h, state_c = encoder(encoder_embedding)
encoder_states = [state_h, state_c]

# Decoder
decoder_inputs = Input(shape=(None,), name='decoder_inputs')
decoder_embedding = Embedding(
    input_dim=len(tag_vocab), output_dim=embedding_dim
)(decoder_inputs)
decoder_lstm = LSTM(hidden_units, return_sequences=True, return_state=True)
decoder_outputs, _, _ = decoder_lstm(
    decoder_embedding, initial_state=encoder_states
)
decoder_dense = Dense(len(tag_vocab), activation='softmax')
decoder_outputs = decoder_dense(decoder_outputs)

model = Model([encoder_inputs, decoder_inputs], decoder_outputs)

model.compile(
    optimizer='adam',
    loss='sparse_categorical_crossentropy',
    metrics=['accuracy'],
)

In [5]:
# Train the model
history = model.fit(
    [encoder_input_data, decoder_output_data],
    decoder_output_data,
    batch_size=1,
    epochs=100,
    validation_split=0.2,
)

Epoch 1/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 4s/step - accuracy: 0.3333 - loss: 1.0979 - val_accuracy: 0.3333 - val_loss: 1.0948
Epoch 2/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 389ms/step - accuracy: 1.0000 - loss: 1.0935 - val_accuracy: 0.6667 - val_loss: 1.0911
Epoch 3/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 146ms/step - accuracy: 1.0000 - loss: 1.0891 - val_accuracy: 0.6667 - val_loss: 1.0873
Epoch 4/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 191ms/step - accuracy: 1.0000 - loss: 1.0847 - val_accuracy: 0.6667 - val_loss: 1.0835
Epoch 5/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 172ms/step - accuracy: 1.0000 - loss: 1.0801 - val_accuracy: 1.0000 - val_loss: 1.0796
Epoch 6/100
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 211ms/step - accuracy: 1.0000 - loss: 1.0754 - val_accuracy: 1.0000 - val_loss: 1.0756
Epoch 7/100
[1m1/1[0m [32m━━━━━━━━

In [6]:
# Evaluate the model
loss, accuracy = model.evaluate([encoder_input_data, decoder_output_data], decoder_output_data)
print(f"Loss: {loss}")
print(f"Accuracy: {accuracy}")

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 800ms/step - accuracy: 1.0000 - loss: 0.1154
Loss: 0.11537280678749084
Accuracy: 1.0
