# NLP with Deep Learning Neural Networks

Import libraries

In [1]:
import numpy as np
import pandas as pd
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.text import Tokenizer 
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Sentiment analysis

In [3]:
import json

sentences = []
labels = []
urls = []

with open("Sarcasm_Headlines_Dataset.json", 'r') as f:
    for line in f:
        item = json.loads(line)
        sentences.append(item['headline'])
        labels.append(item['is_sarcastic'])
        urls.append(item['article_link'])

print("Sentences:", sentences[:5])
print('\n')
print("Labels:", labels[:5])
print('\n')
print("URLs:", urls[:5])


Sentences: ["former versace store clerk sues over secret 'black code' for minority shoppers", "the 'roseanne' revival catches up to our thorny political mood, for better and worse", "mom starting to fear son's web series closest thing she will have to grandchild", 'boehner just wants wife to listen, not come up with alternative debt-reduction ideas', 'j.k. rowling wishes snape happy birthday in the most magical way']


Labels: [0, 0, 1, 1, 0]


URLs: ['https://www.huffingtonpost.com/entry/versace-black-code_us_5861fbefe4b0de3a08f600d5', 'https://www.huffingtonpost.com/entry/roseanne-revival-review_us_5ab3a497e4b054d118e04365', 'https://local.theonion.com/mom-starting-to-fear-son-s-web-series-closest-thing-she-1819576697', 'https://politics.theonion.com/boehner-just-wants-wife-to-listen-not-come-up-with-alt-1819574302', 'https://www.huffingtonpost.com/entry/jk-rowling-wishes-snape-happy-birthday_us_569117c4e4b0cad15e64fdcb']


### Data Preprocessing (Tokenize and get word index, Create Sequences and Padding)

In [5]:
# splitting the data so the neural network doesn't see the test data
training_size = 20000
training_sentences = sentences[0:training_size]
testing_sentences = sentences[training_size:]
training_labels = labels[0:training_size]
testing_labels = labels[training_size:]

In [6]:
# Assuming these are our variables
vocab_size = 27000  # Adjust according to your vocabulary size
embedding_dim = 100
maxlen = 100  # Adjust according to your maximum sequence length

In [7]:
# Preprocessing
tokenizer = Tokenizer(num_words=vocab_size, oov_token="<00V>")
tokenizer.fit_on_texts(training_sentences)

word_index = tokenizer.word_index

#Splitting and padding the training set
training_sequences = tokenizer.texts_to_sequences(training_sentences)
training_padded = pad_sequences(training_sequences, padding='post')

#Splitting and padding the test set
testing_sequences = tokenizer.texts_to_sequences(testing_sentences)
testing_padded = pad_sequences(testing_sequences, padding='post')

In [8]:
# Convert to array
training_padded = np.array(training_padded)
training_labels = np.array(training_labels)
testing_padded = np.array(testing_padded)
testing_labels = np.array(testing_labels)

In [9]:
training_padded.shape

(20000, 40)

In [10]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense, Dropout, Conv1D, GlobalMaxPooling1D
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam

# Define and compile the improved model
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=maxlen))
model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))

# Compile the model with a lower learning rate
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Implement early stopping
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)

# Train the model with early stopping
history = model.fit(training_padded, training_labels, epochs=30, validation_data=(testing_padded, testing_labels), verbose=2, callbacks=[early_stopping])




Epoch 1/30
625/625 - 29s - 46ms/step - accuracy: 0.7780 - loss: 0.4538 - val_accuracy: 0.8495 - val_loss: 0.3487
Epoch 2/30
625/625 - 18s - 29ms/step - accuracy: 0.9280 - loss: 0.1890 - val_accuracy: 0.8419 - val_loss: 0.3788
Epoch 3/30
625/625 - 17s - 28ms/step - accuracy: 0.9820 - loss: 0.0547 - val_accuracy: 0.8399 - val_loss: 0.5249
Epoch 4/30
625/625 - 18s - 29ms/step - accuracy: 0.9922 - loss: 0.0226 - val_accuracy: 0.8414 - val_loss: 0.7135
Epoch 5/30
625/625 - 17s - 28ms/step - accuracy: 0.9948 - loss: 0.0157 - val_accuracy: 0.8454 - val_loss: 0.7455
Epoch 6/30
625/625 - 18s - 29ms/step - accuracy: 0.9967 - loss: 0.0098 - val_accuracy: 0.8351 - val_loss: 0.9253


# Model Evaluation



### Model Evaluation: Conv1D with Global Max Pooling, Dropout, and Early StoppiMy
This model utilizes a Conv1D architecture with Global Max Pooling and Dropout layers, combined with early stopping to prevent overfitting. The model was trained for 6 epochs out of the planned 30 due to early stopping, which halted the training when the validation loss failed to improve.

### Breakdown of Your Code

1. **Importing Libraries:**
```python
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Flatten, Dense, Dropout, Conv1D, GlobalMaxPooling1D
from tensorflow.keras.callbacks import EarlyStopping
from tensorflow.keras.optimizers import Adam
```

2. **Defining the Model:**
```python
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=maxlen))
model.add(Conv1D(filters=128, kernel_size=5, activation='relu'))
model.add(GlobalMaxPooling1D())
model.add(Dropout(0.5))
model.add(Dense(128, activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(1, activation='sigmoid'))
```

- **Sequential()**: Initializes the model as a linear stack of layers.
- **Embedding**: Converts integer indices into dense vectors of fixed size (`embedding_dim`). This layer helps capture the semantic meaning of words.
- **Conv1D(filters=128, kernel_size=5, activation='relu')**: A 1D convolutional layer with 128 filters and a kernel size of 5, applying the ReLU activation function. This layer detects local features in the sequence data.
- **GlobalMaxPooling1D()**: Reduces the dimensionality by taking the maximum value across the time steps, allowing the model to focus on the most prominent features detected by the convolutional layers.
- **Dropout(0.5)**: Randomly sets 50% of the input units to 0 during training, which helps prevent overfitting.
- **Dense(128, activation='relu')**: A fully connected layer with 128 units and ReLU activation to learn complex patterns in the data.
- **Dense (Output Layer)**: A fully connected layer with 1 unit and sigmoid activation, outputting a probability score between 0 and 1 for binary classification.

3. **Compiling the Model:**
```python
optimizer = Adam(learning_rate=0.001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
```

- **optimizer='Adam'**: Adam optimizer with a learning rate of 0.001, which adaptively adjusts learning rates for different parameters.
- **loss='binary_crossentropy'**: This loss function is used for binary classification tasks.
- **metrics=['accuracy']**: Accuracy is monitored during training and validation.

4. **Early Stopping Callback:**
```python
early_stopping = EarlyStopping(monitor='val_loss', patience=5, restore_best_weights=True)
```

- **EarlyStopping**: Monitors the validation loss and stops training if it doesn't improve for 5 consecutive epochs. The best weights are restored after stopping.

5. **Training the Model:**
```python
history = model.fit(training_padded, training_labels, epochs=30, validation_data=(testing_padded, testing_labels), verbose=2, callbacks=[early_stopping])
```

- **fit**: Trains the model for a maximum of 30 epochs, but early stopping may terminate training early.
- **validation_data**: The model's performance is evaluated on the validation data at the end of each epoch.
- **callbacks**: Early stopping is used to monitor and potentially interrupt the training process early.

### Explanation of Results

1. **Epoch 1/30:**
   - **accuracy: 0.7780** and **loss: 0.4538**: The model starts with 77.80% accuracy on the training data and a loss of 0.4538.
   - **val_accuracy: 0.8495** and **val_loss: 0.3487**: The validation performance is better, with an accuracy of 84.95% and a loss of 0.3487, suggesting that the model is learning effectively.

2. **Epoch 2/30:**
   - **accuracy: 0.9280** and **loss: 0.1890**: The training accuracy increases significantly to 92.80%, and the loss decreases, indicating that the model is effectively learning patterns in the training data.
   - **val_accuracy: 0.8419** and **val_loss: 0.3788**: Vd the validation loss increases, suggesting the beginning of overfitting.

3. **Epoch 3/30:**
   - **accuracy: 0.9820** and **loss: 0.0547*er, reaching 98.20%, with a low loss, but this is indicative of overfitting.
   - **val_accuracy: 0.8399** and **val_loss: 0.5249**: The validation loss increases, confirming that the model is overfitting to the training data.

4. **Epoch 4/30:**
   - **accuracy: 0.9922** and **loss: 0.0226**: The model continues to perform exceptionally well on the training data with very low loss, further suggesting overfitting.
   - **val_accurarease, indicating that the model is not generalizing well to the validation data.

5. **Epoch 5/30:**
   - **accuracy: 0.9948** and **loss: 0.0157**: The model's performance on the training data is nearly perfect, which is a clear sign of overfitting.
   - **val_accuracy: 0.8454** and **val_loss: 0.7455**:  validation accuracy, but the loss continues to rise, further confirming overfitting.

6. **Epoch 6/30:**
   - **accuracy: 0.9967** and **loss: 0.0098**: The model's training accuracy is almost 100%, with minimal loss.
   - **val_accuracy: 0.8351** and **va e dropout rates, reducing the model's complexity, or increasing the amount of training data.

**Testing on new sentence**

In [14]:
test_sentences = [
    "Oh great, another Monday morning meeting. Can't wait.",  # Sarcastic
    "I had a wonderful time at the park today with my friends."  # Not sarcastic
]

In [15]:
new_sequences = tokenizer.texts_to_sequences(test_sentences)

In [16]:
padded = pad_sequences(new_sequences, maxlen = maxlen,padding='post',truncating='post')

In [17]:
print(model.predict(padded))

[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 204ms/step
[[0.84157  ]
 [0.1560598]]


**Comparing with test data**

In [19]:
predictions = model.predict(testing_padded)

[1m210/210[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 3ms/step


In [20]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Example sigmoid outputs (probabilities) from your model
sigmoid_outputs = predictions

# Convert probabilities to binary predictions using a threshold of 0.5
threshold = 0.5
binary_predictions = (sigmoid_outputs > threshold).astype(int)

# Example test labels
test_labels = testing_labels

# Evaluate the predictions
accuracy = accuracy_score(test_labels, binary_predictions)
precision = precision_score(test_labels, binary_predictions)
recall = recall_score(test_labels, binary_predictions)
f1 = f1_score(test_labels, binary_predictions)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Accuracy: 0.8494559546877329
Precision: 0.8312629399585921
Recall: 0.822184300341297
F1 Score: 0.8266986959505833


# Let's try using an LSTM layer (Long Short Term Memory)

In [22]:
import numpy as np
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dropout, Dense, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.regularizers import l2
from tensorflow.keras.optimizers import Adam

# Assuming these are your variables
vocab_size = 27000  # Adjust according to your vocabulary size
embedding_dim = 100
maxlen = 100  # Adjust according to your maximum sequence length

# Create and pad sequences
training_padded = pad_sequences(training_sequences, maxlen=maxlen, padding='post', truncating='post')
testing_padded = pad_sequences(testing_sequences, maxlen=maxlen, padding='post', truncating='post')

# Convert lists to numpy arrays
training_padded = np.array(training_padded)
testing_padded = np.array(testing_padded)
training_labels = np.array(training_labels)
testing_labels = np.array(testing_labels)

In [56]:
# Define the model
model = Sequential()

# Embedding layer
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=maxlen))

# Bidirectional LSTM layers with L2 regularization
model.add(Bidirectional(LSTM(32, return_sequences=True, kernel_regularizer=l2(0.01))))
model.add(BatchNormalization())
model.add(Dropout(0.6))

model.add(Bidirectional(LSTM(32, return_sequences=True, kernel_regularizer=l2(0.01))))
model.add(BatchNormalization())
model.add(Dropout(0.6))

model.add(Bidirectional(LSTM(32, kernel_regularizer=l2(0.01))))
model.add(BatchNormalization())
model.add(Dropout(0.6))

# Dense layer with L2 regularization
model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.6))

# Output layer
model.add(Dense(1, activation='sigmoid'))

# Compile the model with a lower learning rate
optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])

# Early stopping with a more conservative patience value
early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)

# Reduce learning rate on plateau
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=1, min_lr=0.00001)

# Learning rate scheduler
def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return float(lr * tf.math.exp(-0.1))

lr_scheduler = LearningRateScheduler(scheduler)

# Train the model
history = model.fit(
    training_padded, 
    training_labels, 
    epochs=30, 
    validation_data=(testing_padded, testing_labels), 
    callbacks=[early_stopping, reduce_lr, lr_scheduler], 
    verbose=2
)




Epoch 1/30
625/625 - 147s - 235ms/step - accuracy: 0.5015 - loss: 6.2891 - val_accuracy: 0.5509 - val_loss: 5.1404 - learning_rate: 1.0000e-04
Epoch 2/30
625/625 - 90s - 144ms/step - accuracy: 0.5153 - loss: 4.9184 - val_accuracy: 0.5604 - val_loss: 4.2336 - learning_rate: 1.0000e-04
Epoch 3/30
625/625 - 95s - 152ms/step - accuracy: 0.5188 - loss: 4.1127 - val_accuracy: 0.5570 - val_loss: 3.6580 - learning_rate: 1.0000e-04
Epoch 4/30
625/625 - 95s - 151ms/step - accuracy: 0.5247 - loss: 3.5504 - val_accuracy: 0.6148 - val_loss: 3.1952 - learning_rate: 1.0000e-04
Epoch 5/30
625/625 - 93s - 149ms/step - accuracy: 0.5404 - loss: 3.0869 - val_accuracy: 0.6786 - val_loss: 2.7860 - learning_rate: 1.0000e-04
Epoch 6/30
625/625 - 90s - 144ms/step - accuracy: 0.5987 - loss: 2.6509 - val_accuracy: 0.7737 - val_loss: 2.3462 - learning_rate: 1.0000e-04
Epoch 7/30
625/625 - 91s - 146ms/step - accuracy: 0.7324 - loss: 2.1945 - val_accuracy: 0.8202 - val_loss: 1.9178 - learning_rate: 1.0000e-04
Epoch

# Evaluate your new model



### Model Evaluation: Bidirectional LSTM with Batch Normalization, Dropout, and Learning Rate Scheduling

This model employs a Bidirectional LSTM architecture with Batch Normalization, Dropout regularization, L2 regularization, and advanced learning rate scheduling. The model is designed to combat overfitting and improve generalization. The training lasted for 18 epochs out of the planned 30 due to the early stopping mechanism.

### Breakdown of Your Code

1. **Importing Libraries:**
```python
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, Bidirectional, LSTM, Dropout, Dense, BatchNormalization
from tensorflow.keras.callbacks import EarlyStopping, ReduceLROnPlateau, LearningRateScheduler
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.regularizers import l2
```

2. **Defining the Model:**
```python
model = Sequential()
model.add(Embedding(input_dim=vocab_size, output_dim=embedding_dim, input_length=maxlen))
model.add(Bidirectional(LSTM(32, return_sequences=True, kernel_regularizer=l2(0.01))))
model.add(BatchNormalization())
model.add(Dropout(0.6))
model.add(Bidirectional(LSTM(32, return_sequences=True, kernel_regularizer=l2(0.01))))
model.add(BatchNormalization())
model.add(Dropout(0.6))
model.add(Bidirectional(LSTM(32, kernel_regularizer=l2(0.01))))
model.add(BatchNormalization())
model.add(Dropout(0.6))
model.add(Dense(32, activation='relu', kernel_regularizer=l2(0.01)))
model.add(Dropout(0.6))
model.add(Dense(1, activation='sigmoid'))
```

- **Sequential()**: Initializes a linear stack of layers.
- **Embedding**: Converts integer indices into dense vectors of a fixed size (`embedding_dim`), capturing the semantic meaning of words.
- **Bidirectional(LSTM(32, return_sequences=True, kernel_regularizer=l2(0.01)))**: LSTM layers that process input sequences bidirectionally, with L2 regularization to penalize large weights.
- **BatchNormalization()**: Normalizes the output of the LSTM layers to stabilize learning.
- **Dropout(0.6)**: Randomly sets 60% of the input units to 0, helping to prevent overfitting.
- **Dense(32, activation='relu', kernel_regularizer=l2(0.01))**: A fully connected layer with ReLU activation and L2 regularization to learn complex patterns.
- **Dense (Output Layer)**: A fully connected layer with 1 unit and sigmoid activation for binary classification.

3. **Compiling the Model:**
```python
optimizer = Adam(learning_rate=0.0001)
model.compile(optimizer=optimizer, loss='binary_crossentropy', metrics=['accuracy'])
```

- **optimizer='Adam'**: An adaptive learning rate optimizer that adjusts learning rates for different parameters.
- **loss='binary_crossentropy'**: A loss function for binary classification.
- **metrics=['accuracy']**: Monitors accuracy during training and validation.

4. **Callbacks for Training:**
```python
early_stopping = EarlyStopping(monitor='val_loss', patience=2, restore_best_weights=True)
reduce_lr = ReduceLROnPlateau(monitor='val_loss', factor=0.5, patience=1, min_lr=0.00001)

def scheduler(epoch, lr):
    if epoch < 10:
        return lr
    else:
        return float(lr * tf.math.exp(-0.1))

lr_scheduler = LearningRateScheduler(scheduler)
```

- **EarlyStopping**: Stops training if the validation loss does not improve for 2 consecutive epochs, restoring the best weights.
- **ReduceLROnPlateau**: Reduces the learning rate by a factor of 0.5 if the validation loss plateaus, with a minimum learning rate of 0.00001.
- **LearningRateScheduler**: Decreases the learning rate exponentially after 10 epochs to fine-tune the learning process.

5. **Training the Model:**
```python
history = model.fit(training_padded, training_labels, epochs=30, validation_data=(testing_padded, testing_labels), callbacks=[early_stopping, reduce_lr, lr_scheduler], verbose=2)
```

- **fit**: Trains the model for a maximum of 30 epochs, with early stopping potentially terminating the training early.
- **validation_data**: Evaluates the model’s performance on the validation set after each epoch.
- **callbacks**: Incorporates early stopping, learning rate reduction, and scheduling to optimize training.

### Explanation of Results

1. **Epoch 1/30:**
   - **accuracy: 0.5015** and **loss: 6.2891**: The model starts with 50.15% accuracy and a high loss, suggesting it's initially struggling to learn.
   - **val_accuracy: 0.5509** and **val_loss: 5.1404**: The validation metrics show slight improvement, indicating the model is beginning to learn.

2. **Epoch 6/30:**
   - **accuracy: 0.5987** and **loss: 2.6509**: The model’s performance improves, achieving 59.87% accuracy with a reduced loss, indicating it is learning more effectively.
   - **val_accuracy: 0.7737** and **val_loss: 2.3462**: Validation accuracy improves significantly, suggesting that the model is generalizing better.

3. **Epoch 10/30:**
   - **accuracy: 0.9405** and **loss: 1.1072**: The model achieves over 94% accuracy on training data, but this high accuracy suggests potential overfitting.
   - **val_accuracy: 0.8360** and **val_loss: 1.2438**: The validation loss starts to increase slightly, hinting at the beginning of overfitting.

4. **Epoch 18/30:**
   - **accuracy: 0.9945** and **loss: 0.2725**: The model reaches near-perfect accuracy on the training set, strongly indicating overfitting.
   - **val_accuracy: 0.8346** and **val_loss: 0.9490**: Va as reducing the model's complexity or increasing the dropout rate, could help improve generalization.hniques, more data, or tuning the model architecture to improve generalization.

**Comparing with test data**

In [60]:
predictions = model.predict(testing_padded)

[1m210/210[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m28s[0m 115ms/step


In [62]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Example sigmoid outputs (probabilities) from your model
sigmoid_outputs = predictions

# Convert probabilities to binary predictions using a threshold of 0.5
threshold = 0.5
binary_predictions = (sigmoid_outputs > threshold).astype(int)

# Example test labels
test_labels = testing_labels

# Evaluate the predictions
accuracy = accuracy_score(test_labels, binary_predictions)
precision = precision_score(test_labels, binary_predictions)
recall = recall_score(test_labels, binary_predictions)
f1 = f1_score(test_labels, binary_predictions)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Accuracy: 0.834252496646296
Precision: 0.844579226686884
Recall: 0.7604095563139932
F1 Score: 0.8002873563218392


In [32]:
import numpy as np
from sklearn.metrics import accuracy_score, precision_score, recall_score, f1_score

# Example sigmoid outputs (probabilities) from your model
sigmoid_outputs = predictions

# Convert probabilities to binary predictions using a threshold of 0.5
threshold = 0.5
binary_predictions = (sigmoid_outputs > threshold).astype(int)

# Example test labels
test_labels = testing_labels

# Evaluate the predictions
accuracy = accuracy_score(test_labels, binary_predictions)
precision = precision_score(test_labels, binary_predictions)
recall = recall_score(test_labels, binary_predictions)
f1 = f1_score(test_labels, binary_predictions)

print(f"Accuracy: {accuracy}")
print(f"Precision: {precision}")
print(f"Recall: {recall}")
print(f"F1 Score: {f1}")

Accuracy: 0.8639141451781189
Precision: 0.859536541889483
Recall: 0.8228668941979522
F1 Score: 0.8408020924149955


# Building a model that can write poetry

In [34]:
poem = """
In twilight's gentle, fleeting grace,
A wanderer roams the open space,
His steps a whisper on the ground,
Where shadows play and dreams are found.

With every stride, the earth's embrace,
Turns ancient tales in twilight's face,
A saddle tight, his spirit free,
He seeks the stars, the boundless sea.

Yet as he wanders, time stands still,
The night reveals its subtle thrill,
He watches skies with eyes of fire,
For constellations, dreams aspire.

In moonlit silence, tales unwind,
Of distant lands and love entwined,
His journey ends where starlight gleams,
In echoes of forgotten dreams.
"""

In [173]:
poem = poem.lower().split('/n')

In [175]:
tokenizer.fit_on_texts(poem)

In [176]:
total_words = len(tokenizer.word_index) + 1

In [179]:
total_words

25645

In [180]:
input_sequences = []
for line in poem:
    token_list = tokenizer.texts_to_sequences([line])[0]
    for i in range(1, len(token_list)):
        n_gram_sequence = token_list[:i+1]
        input_sequences.append(n_gram_sequence)

In [181]:
input_sequences

[[5, 6847],
 [5, 6847, 5122],
 [5, 6847, 5122, 8650],
 [5, 6847, 5122, 8650, 3346],
 [5, 6847, 5122, 8650, 3346, 7],
 [5, 6847, 5122, 8650, 3346, 7, 12785],
 [5, 6847, 5122, 8650, 3346, 7, 12785, 7934],
 [5, 6847, 5122, 8650, 3346, 7, 12785, 7934, 4],
 [5, 6847, 5122, 8650, 3346, 7, 12785, 7934, 4, 363],
 [5, 6847, 5122, 8650, 3346, 7, 12785, 7934, 4, 363, 836],
 [5, 6847, 5122, 8650, 3346, 7, 12785, 7934, 4, 363, 836, 33],
 [5, 6847, 5122, 8650, 3346, 7, 12785, 7934, 4, 363, 836, 33, 974],
 [5, 6847, 5122, 8650, 3346, 7, 12785, 7934, 4, 363, 836, 33, 974, 7],
 [5, 6847, 5122, 8650, 3346, 7, 12785, 7934, 4, 363, 836, 33, 974, 7, 12786],
 [5,
  6847,
  5122,
  8650,
  3346,
  7,
  12785,
  7934,
  4,
  363,
  836,
  33,
  974,
  7,
  12786,
  8],
 [5,
  6847,
  5122,
  8650,
  3346,
  7,
  12785,
  7934,
  4,
  363,
  836,
  33,
  974,
  7,
  12786,
  8,
  4],
 [5,
  6847,
  5122,
  8650,
  3346,
  7,
  12785,
  7934,
  4,
  363,
  836,
  33,
  974,
  7,
  12786,
  8,
  4,
  1217],
 [5,

In [184]:
max_sequence_len = max([len(x) for x in input_sequences])

In [185]:
np.array(pad_sequences(input_sequences, maxlen = max_sequence_len, padding = 'pre'))

array([[   0,    0,    0, ...,    0,    5, 6847],
       [   0,    0,    0, ...,    5, 6847, 5122],
       [   0,    0,    0, ..., 6847, 5122, 8650],
       ...,
       [   0,    0,    5, ...,    5, 5780,    3],
       [   0,    5, 6847, ..., 5780,    3, 2015],
       [   5, 6847, 5122, ...,    3, 2015,  807]])

In [186]:
input_sequences = np.array(pad_sequences(input_sequences, maxlen = max_sequence_len, padding = 'pre'))

In [190]:
#split into features and labels (the last feature in evey row becomes a lable)
xs = input_sequences[:,:-1]
labels = input_sequences[:,-1:]

In [192]:
xs

array([[    0,     0,     0, ...,     0,     0,     5],
       [    0,     0,     0, ...,     0,     5,  6847],
       [    0,     0,     0, ...,     5,  6847,  5122],
       ...,
       [    0,     0,     5, ..., 12790,     5,  5780],
       [    0,     5,  6847, ...,     5,  5780,     3],
       [    5,  6847,  5122, ...,  5780,     3,  2015]])

In [194]:
labels

array([[ 6847],
       [ 5122],
       [ 8650],
       [ 3346],
       [    7],
       [12785],
       [ 7934],
       [    4],
       [  363],
       [  836],
       [   33],
       [  974],
       [    7],
       [12786],
       [    8],
       [    4],
       [ 1217],
       [  158],
       [ 5650],
       [  578],
       [    9],
       [  807],
       [   30],
       [  268],
       [   10],
       [  142],
       [ 6833],
       [    4],
       [ 3324],
       [ 3045],
       [  700],
       [ 1808],
       [ 2905],
       [    5],
       [ 6847],
       [  273],
       [    7],
       [ 8868],
       [ 5448],
       [   33],
       [ 2235],
       [  260],
       [   34],
       [ 2155],
       [    4],
       [  716],
       [    4],
       [ 8688],
       [ 1108],
       [  453],
       [   27],
       [   34],
       [ 8671],
       [   57],
       [ 2282],
       [   73],
       [    4],
       [  254],
       [  293],
       [  189],
       [ 6801],
       [ 3617],
       [

In [196]:
ys = tf.keras.utils.to_categorical(labels, num_classes=total_words)

In [211]:
from tensorflow.keras.optimizers import Adam

model = Sequential()

model.add(Embedding(total_words, 240, input_length=max_sequence_len-1))
model.add(Bidirectional(LSTM(150)))
model.add(Dense(total_words, activation='softmax'))
adam = Adam(learning_rate=0.01)

model.compile(loss='categorical_crossentropy', optimizer=adam, metrics=['accuracy'])

In [213]:
early_stopping = EarlyStopping(monitor='accuracy', patience=5, restore_best_weights=True)

history = model.fit(xs, ys, epochs=25, batch_size=32, verbose=1,  callbacks=[early_stopping])

Epoch 1/25
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 358ms/step - accuracy: 0.0236 - loss: 10.1108
Epoch 2/25
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 328ms/step - accuracy: 0.0589 - loss: 6.8573
Epoch 3/25
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 314ms/step - accuracy: 0.0222 - loss: 4.8044
Epoch 4/25
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 310ms/step - accuracy: 0.0053 - loss: 4.5723  
Epoch 5/25
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 316ms/step - accuracy: 0.0353 - loss: 4.5168
Epoch 6/25
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 313ms/step - accuracy: 0.0353 - loss: 4.4491
Epoch 7/25
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 326ms/step - accuracy: 0.0876 - loss: 4.2182
Epoch 8/25
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 335ms/step - accuracy: 0.0812 - loss: 4.1713
Epoch 9/25
[1m3/3[0m [32m━━━━━━━━━━━━━━━━━━━━[0m

In [214]:
seed_text = "In the hush of dawn, a wanderer"
next_words = 20

In [215]:
import numpy as np

for _ in range(next_words):
    token_list = tokenizer.texts_to_sequences([seed_text])[0]
    token_list = pad_sequences([token_list], maxlen=max_sequence_len-1, padding='pre')
    predicted_probs = model.predict(token_list, verbose=0)
    predicted_index = np.argmax(predicted_probs, axis=-1)[0]
    output_word = ""
    for word, index in tokenizer.word_index.items():
        if index == predicted_index:
            output_word = word
            break
    seed_text += " " + output_word
    print(seed_text)


In the hush of dawn, a wanderer roams
In the hush of dawn, a wanderer roams the
In the hush of dawn, a wanderer roams the ground
In the hush of dawn, a wanderer roams the ground where
In the hush of dawn, a wanderer roams the ground where starlight
In the hush of dawn, a wanderer roams the ground where starlight gleams
In the hush of dawn, a wanderer roams the ground where starlight gleams in
In the hush of dawn, a wanderer roams the ground where starlight gleams in twilight's
In the hush of dawn, a wanderer roams the ground where starlight gleams in twilight's face
In the hush of dawn, a wanderer roams the ground where starlight gleams in twilight's face a
In the hush of dawn, a wanderer roams the ground where starlight gleams in twilight's face a wanderer
In the hush of dawn, a wanderer roams the ground where starlight gleams in twilight's face a wanderer roams
In the hush of dawn, a wanderer roams the ground where starlight gleams in twilight's face a wanderer roams the
In the hush 