<a href="https://colab.research.google.com/github/Scaglione-Nick/ECGR4106/blob/main/Hw5_1.ipynb" target="_parent"><img src="https://colab.research.google.com/assets/colab-badge.svg" alt="Open In Colab"/></a>

In [None]:
import numpy as np

# Given text for next character prediction task
text = """
Next character prediction is a fundamental task in the field of natural language processing (NLP) that involves predicting the next character in a sequence of text based on the characters that precede it. This task is essential for various applications, including text auto-completion, spell checking, and even in the development of sophisticated AI models capable of generating human-like text.
At its core, next character prediction relies on statistical models or deep learning algorithms to analyze a given sequence of text and predict which character is most likely to follow. These predictions are based on patterns and relationships learned from large datasets of text during the training phase of the model.
One of the most popular approaches to next character prediction involves the use of Recurrent Neural Networks (RNNs), and more specifically, a variant called Long Short-Term Memory (LSTM) networks. RNNs are particularly well-suited for sequential data like text, as they can maintain information in 'memory' about previous characters to inform the prediction of the next character. LSTM networks enhance this capability by being able to remember long-term dependencies, making them even more effective for next character prediction tasks.
Training a model for next character prediction involves feeding it large amounts of text data, allowing it to learn the probability of each character's appearance following a sequence of characters. During this training process, the model adjusts its parameters to minimize the difference between its predictions and the actual outcomes, thus improving its predictive accuracy over time.
Once trained, the model can be used to predict the next character in a given piece of text by considering the sequence of characters that precede it. This can enhance user experience in text editing software, improve efficiency in coding environments with auto-completion features, and enable more natural interactions with AI-based chatbots and virtual assistants.
In summary, next character prediction plays a crucial role in enhancing the capabilities of various NLP applications, making text-based interactions more efficient, accurate, and human-like. Through the use of advanced machine learning models like RNNs and LSTMs, next character prediction continues to evolve, opening new possibilities for the future of text-based technology.
"""

# Create character-to-index and index-to-character dictionaries
chars = sorted(set(text))
char_to_idx = {ch: idx for idx, ch in enumerate(chars)}
idx_to_char = {idx: ch for idx, ch in enumerate(chars)}

# Prepare sequences for training with lengths 10, 20, and 30
def prepare_sequences(text, seq_length):
    sequences = []
    targets = []
    for i in range(0, len(text) - seq_length):
        seq = text[i:i + seq_length]
        target = text[i + seq_length]  # Next character after the sequence
        sequences.append([char_to_idx[c] for c in seq])
        targets.append(char_to_idx[target])  # Target is the next character

    sequences = np.array(sequences)
    targets = np.array(targets)

    # Print shapes to debug
    print(f"Input shape (X_train): {sequences.shape}")
    print(f"Target shape (y_train): {targets.shape}")

    return sequences, targets


# Prepare data for different sequence lengths
seq_lengths = [10, 20, 30]
data = {length: prepare_sequences(text, length) for length in seq_lengths}



Input shape (X_train): (2378, 10)
Target shape (y_train): (2378,)
Input shape (X_train): (2368, 20)
Target shape (y_train): (2368,)
Input shape (X_train): (2358, 30)
Target shape (y_train): (2358,)


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Dense, Embedding, LayerNormalization, MultiHeadAttention
from tensorflow.keras.models import Model

# Define Transformer Model
def transformer_model(seq_length, vocab_size, embed_dim=64, num_heads=4, num_layers=2):
    inputs = Input(shape=(seq_length,))
    embedding = Embedding(vocab_size, embed_dim)(inputs)
    x = embedding
    for _ in range(num_layers):
        x = MultiHeadAttention(num_heads=num_heads, key_dim=embed_dim)(x, x)
        x = LayerNormalization()(x)
    # Use the last token's output to predict the next character
    x = x[:, -1, :]  # Select the last token in the sequence
    x = Dense(vocab_size, activation='softmax')(x)
    model = Model(inputs, x)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model
  # Define models for different sequence lengths
models = {}
seq_lengths = [10, 20, 30]  # Example sequence lengths
vocab_size = len(chars)
for length in seq_lengths:
    models[length] = transformer_model(length, vocab_size)  # Use the transformer model
    # or use the RNN model if needed
    # rnn_models[length] = rnn_model(length, vocab_size)



NameError: name 'chars' is not defined

In [None]:
from tensorflow.keras.layers import LSTM, SimpleRNN

def rnn_model(seq_length, vocab_size, embed_dim=64, rnn_type='LSTM'):
    inputs = Input(shape=(seq_length,))
    embedding = Embedding(vocab_size, embed_dim)(inputs)
    if rnn_type == 'LSTM':
        x = LSTM(64)(embedding)
    elif rnn_type == 'RNN':
        x = SimpleRNN(64)(embedding)
    # Only output the prediction for the next character
    outputs = Dense(vocab_size, activation='softmax')(x)
    model = Model(inputs, outputs)
    model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])
    return model
rnn_models = {}
seq_lengths = [10, 20, 30]  # Example sequence lengths
vocab_size = len(chars)
for length in seq_lengths:
    rnn_models[length] = rnn_model(length, vocab_size)  # Use the transformer model
    # or use the RNN model if needed
    # rnn_models[length] = rnn_model(length, vocab_size)


In [None]:
# Train the Transformer and RNN models for different sequence lengths
def train_model(model, data, seq_length):
    X_train, y_train = data
    model.fit(X_train, y_train, epochs=10, batch_size=64)

for length, data_set in data.items():
    train_model(models[length], data_set, length)
    train_model(rnn_models[length], data_set, length)


Epoch 1/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 54ms/step - accuracy: 0.2275 - loss: 2.7229
Epoch 2/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 58ms/step - accuracy: 0.2303 - loss: 2.6557
Epoch 3/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 59ms/step - accuracy: 0.2386 - loss: 2.6403
Epoch 4/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 61ms/step - accuracy: 0.2172 - loss: 2.6505
Epoch 5/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m4s[0m 95ms/step - accuracy: 0.2353 - loss: 2.6412
Epoch 6/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 53ms/step - accuracy: 0.2427 - loss: 2.6002
Epoch 7/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 62ms/step - accuracy: 0.2427 - loss: 2.6129
Epoch 8/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 61ms/step - accuracy: 0.2511 - loss: 2.5884
Epoch 9/10
[1m38/38[0m [32m━━━━━━━━━━━━━━━━━━

In [None]:
# Evaluate the models
def evaluate_model(model, data, seq_length):
    X_val, y_val = data
    loss, accuracy = model.evaluate(X_val, y_val)
    return loss, accuracy

for length in seq_lengths:
    transformer_loss, transformer_acc = evaluate_model(models[length], data[length], length)
    rnn_loss, rnn_acc = evaluate_model(rnn_models[length], data[length], length)
    print(f"Sequence Length {length} - Transformer: Loss={transformer_loss}, Accuracy={transformer_acc}")
    print(f"Sequence Length {length} - RNN: Loss={rnn_loss}, Accuracy={rnn_acc}")

[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 10ms/step - accuracy: 0.2893 - loss: 2.4333
[1m75/75[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.3394 - loss: 2.3482
Sequence Length 10 - Transformer: Loss=2.473850965499878, Accuracy=0.27964675426483154
Sequence Length 10 - RNN: Loss=2.3763086795806885, Accuracy=0.33137089014053345
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - accuracy: 0.1894 - loss: 2.8115
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 6ms/step - accuracy: 0.3257 - loss: 2.4193
Sequence Length 20 - Transformer: Loss=2.8271613121032715, Accuracy=0.18496622145175934
Sequence Length 20 - RNN: Loss=2.4349310398101807, Accuracy=0.31967905163764954
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 21ms/step - accuracy: 0.1564 - loss: 2.9501
[1m74/74[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 7ms/step - accuracy: 0.3153 - loss: 2.3970
Sequence Length 30 -