In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical

text = """
Character-level language modeling is a text generation task where the model predicts the next character in a sequence of characters.
Recurrent Neural Networks (RNNs) are suitable for this task, as they can capture sequential dependencies.
"""

text = text.lower()
tokenizer = Tokenizer(char_level=True)
tokenizer.fit_on_texts(text)
total_chars = len(tokenizer.word_index) + 1

input_sequences = []
target_chars = []
seq_length = 100

for i in range(0, len(text) - seq_length, 1):
    input_seq = text[i:i + seq_length]
    target_seq = text[i + seq_length]
    input_sequences.append(input_seq)
    target_chars.append(target_seq)

char_to_index = tokenizer.word_index
X = np.array([[char_to_index[char] for char in seq] for seq in input_sequences])

y = to_categorical([char_to_index[char] for char in target_chars], num_classes=total_chars)

model = Sequential()
model.add(Embedding(total_chars, 128, input_length=seq_length))
model.add(LSTM(128))
model.add(Dense(total_chars, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam')

model.fit(X, y, epochs=50, batch_size=64)

seed_text = "character-level language modeling is"
generated_text = seed_text

for _ in range(200):
    x = np.array([[char_to_index[char] for char in seed_text]])
    x = pad_sequences(x, maxlen=seq_length, truncating='pre', value=char_to_index[' '])
    predicted_char_index = np.argmax(model.predict(x))
    predicted_char = [char for char, index in char_to_index.items() if index == predicted_char_index][0]
    generated_text += predicted_char
    seed_text = seed_text[1:] + predicted_char

print(generated_text)

Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50
Epoch 31/50
Epoch 32/50
Epoch 33/50
Epoch 34/50
Epoch 35/50
Epoch 36/50
Epoch 37/50
Epoch 38/50
Epoch 39/50
Epoch 40/50
Epoch 41/50
Epoch 42/50
Epoch 43/50
Epoch 44/50
Epoch 45/50
Epoch 46/50
Epoch 47/50
Epoch 48/50
Epoch 49/50
Epoch 50/50
character-level language modeling is a s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s a ca s


In [None]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from sklearn.model_selection import train_test_split


sentences = [
    "Apple Inc. is headquartered in Cupertino, California.",
    "John Smith works at Google in New York.",
    "Paris is a beautiful city in France."
]

labels = [
    "ORG ORG O O O LOC LOC O O",
    "PER PER O O ORG O LOC LOC O",
    "LOC O O O LOC LOC LOC O"
]

max_seq_length = 15
tokenizer = Tokenizer()
tokenizer.fit_on_texts(sentences)
vocab_size = len(tokenizer.word_index) + 1

X = tokenizer.texts_to_sequences(sentences)
X = pad_sequences(X, maxlen=max_seq_length, padding='post')

label_tokenizer = Tokenizer(filters=' ')
label_tokenizer.fit_on_texts(labels)
num_classes = len(label_tokenizer.word_index) + 1

y = label_tokenizer.texts_to_sequences(labels)
y = pad_sequences(y, maxlen=max_seq_length, padding='post')

X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=128, input_length=max_seq_length))
model.add(LSTM(128, return_sequences=True))
model.add(Dense(num_classes, activation='softmax'))

model.compile(loss='categorical_crossentropy', optimizer='adam', metrics=['accuracy'])

y_train = [tf.keras.utils.to_categorical(label, num_classes=num_classes) for label in y_train]

model.fit(X_train, np.array(y_train), epochs=10, batch_size=32)

y_test = [tf.keras.utils.to_categorical(label, num_classes=num_classes) for label in y_test]
loss, accuracy = model.evaluate(X_test, np.array(y_test))

print(f"Test loss: {loss:.4f}")
print(f"Test accuracy: {accuracy:.4f}")



Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test loss: 1.2747
Test accuracy: 0.4667
