#Recurrent Neural Networks:

A Recurrent Neural Network (RNN) is a type of artificial neural network designed to work with sequential data, where the order of inputs is crucial. Unlike traditional feedforward neural networks, RNNs have connections that form a directed cycle, allowing them to exhibit dynamic temporal behavior. This cyclic structure enables RNNs to maintain a memory of previous inputs, making them well-suited for tasks involving sequences like time series prediction, natural language processing (e.g., language modeling, machine translation), and speech recognition. RNNs process input sequences step-by-step, updating their internal state with each new input, which in turn influences subsequent outputs. However, standard RNNs suffer from issues like vanishing or exploding gradients over long sequences. To address this, variants like Long Short-Term Memory (LSTM) and Gated Recurrent Units (GRU) were developed, featuring specialized gating mechanisms that better control the flow of information and alleviate these problems. RNNs are implemented in frameworks like TensorFlow and PyTorch, offering powerful tools for training and deploying models that excel in handling sequential data.

In [None]:
import tensorflow as tf
from tensorflow.keras.datasets import imdb
from tensorflow.keras.preprocessing.sequence import pad_sequences

# Load IMDb dataset
(train_data, train_labels), (test_data, test_labels) = imdb.load_data(num_words=10000)

# Preprocess data
maxlen = 200  # Maximum sequence length
train_data = pad_sequences(train_data, maxlen=maxlen)
test_data = pad_sequences(test_data, maxlen=maxlen)

# Build RNN model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=10000, output_dim=32, input_length=maxlen),
    tf.keras.layers.SimpleRNN(units=32),
    tf.keras.layers.Dense(units=1, activation='sigmoid')
])

# Compile model
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])

# Display model summary
model.summary()

# Train model
batch_size = 128
epochs = 5
model.fit(train_data, train_labels, batch_size=batch_size, epochs=epochs, validation_split=0.2)

# Evaluate model
loss, accuracy = model.evaluate(test_data, test_labels)
print(f'Test Accuracy: {accuracy:.4f}')

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/imdb.npz
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 200, 32)           320000    
                                                                 
 simple_rnn (SimpleRNN)      (None, 32)                2080      
                                                                 
 dense (Dense)               (None, 1)                 33        
                                                                 
Total params: 322113 (1.23 MB)
Trainable params: 322113 (1.23 MB)
Non-trainable params: 0 (0.00 Byte)
_________________________________________________________________
Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5
Test Accuracy: 0.8408


In [None]:
import tensorflow as tf
from tensorflow.keras.layers import Input, Embedding, SimpleRNN, GRU, LSTM, Bidirectional, Conv1D, GlobalMaxPooling1D, Dropout, BatchNormalization, Dense

# Define input shape and vocabulary size
input_shape = (100,)  # Example input shape (sequence length)
vocab_size = 10000

# Input layer for variable-length sequences (e.g., padded text sequences)
input_layer = Input(shape=input_shape)

# Embedding layer to convert integer-encoded tokens to dense vectors
embedding_layer = Embedding(input_dim=vocab_size, output_dim=32)(input_layer)

# RNN layers
# SimpleRNN layer for basic sequence processing
rnn_layer = SimpleRNN(units=64, return_sequences=True)(embedding_layer)

# GRU layer for more complex memory management than SimpleRNN
gru_layer = GRU(units=64, return_sequences=True)(embedding_layer)

# LSTM layer for handling long-term dependencies in sequences
lstm_layer = LSTM(units=64, return_sequences=True)(embedding_layer)

# Bidirectional LSTM layer
# Processes input sequences in both forward and backward directions
bidirectional_lstm = Bidirectional(LSTM(units=64, return_sequences=True))(embedding_layer)

# Convolutional layer followed by Global Max Pooling
# Conv1D layer with 64 filters and kernel size 3 for learning local patterns
conv_layer = Conv1D(filters=64, kernel_size=3, activation='relu')(embedding_layer)
# GlobalMaxPooling1D layer to reduce sequence length by selecting max value across each feature
pooling_layer = GlobalMaxPooling1D()(conv_layer)

# Dropout and BatchNormalization layers
# Dropout layer to randomly set input units to zero to prevent overfitting
dropout_layer = Dropout(rate=0.5)(pooling_layer)
# BatchNormalization layer to normalize activations, improving training stability
batchnorm_layer = BatchNormalization()(dropout_layer)

# Custom Attention layer (Bahdanau style)
class BahdanauAttention(tf.keras.layers.Layer):
    def __init__(self, units):
        super(BahdanauAttention, self).__init__()
        self.W1 = Dense(units)
        self.W2 = Dense(units)
        self.V = Dense(1)

    def call(self, query, values):
        # Bahdanau attention mechanism
        query_with_time_axis = tf.expand_dims(query, 1)
        score = self.V(tf.nn.tanh(self.W1(query_with_time_axis) + self.W2(values)))
        attention_weights = tf.nn.softmax(score, axis=1)
        context_vector = attention_weights * values
        context_vector = tf.reduce_sum(context_vector, axis=1)
        return context_vector

# Apply Attention mechanism to the LSTM output
attention_layer = BahdanauAttention(units=64)(lstm_layer)

# Dense output layer for classification
output_layer = Dense(units=4, activation='softmax')(attention_layer)

# Create a model with input and output layers
model = tf.keras.Model(inputs=input_layer, outputs=output_layer)

# Compile the model with Adam optimizer and sparse categorical crossentropy loss
model.compile(optimizer='adam', loss='sparse_categorical_crossentropy', metrics=['accuracy'])

# Display model summary with detailed layer information
model.summary()

In [None]:
import tensorflow as tf
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import numpy as np
import tensorflow_datasets as tfds
from sklearn.model_selection import train_test_split

# Load AG News dataset
dataset, info = tfds.load('ag_news_subset', split='train', with_info=True)

# Extract text and labels from the dataset
texts = []
labels = []

for example in dataset:
    texts.append(example['description'].numpy().decode('utf-8'))
    labels.append(example['label'].numpy())

# Convert labels to numpy array
labels = np.array(labels)

# Tokenize the texts
tokenizer = Tokenizer()
tokenizer.fit_on_texts(texts)

# Convert text to sequences of integers
sequences = tokenizer.texts_to_sequences(texts)

# Pad sequences to ensure uniform length
maxlen = 200  # choose a maximum sequence length
padded_sequences = pad_sequences(sequences, maxlen=maxlen, padding='post')

# Define the RNN model
model = tf.keras.Sequential([
    tf.keras.layers.Embedding(input_dim=len(tokenizer.word_index) + 1, output_dim=128, input_length=maxlen),
    tf.keras.layers.SimpleRNN(64),  # Simple RNN layer
    tf.keras.layers.Dense(4, activation='softmax')  # Output layer with 4 units (one for each class)
])

# Compile the model
model.compile(optimizer='adam',
              loss='sparse_categorical_crossentropy',
              metrics=['accuracy'])

# Split the data into training and validation sets
X_train, X_val, y_train, y_val = train_test_split(padded_sequences, labels, test_size=0.2, random_state=42)

# Define EarlyStopping callback
early_stopping_callback = tf.keras.callbacks.EarlyStopping(
    monitor='val_loss',  # monitor validation loss
    patience=3,           # number of epochs with no improvement after which training will be stopped
    restore_best_weights=True  # restore model weights to the best iteration
)

# Train the model with EarlyStopping callback
history = model.fit(X_train, y_train,
                    epochs=10,
                    batch_size=32,
                    validation_data=(X_val, y_val),
                    callbacks=[early_stopping_callback])  # include EarlyStopping callback

# Load test data
test_dataset = tfds.load('ag_news_subset', split='test')

test_texts = []
test_labels = []

for example in test_dataset:
    test_texts.append(example['description'].numpy().decode('utf-8'))
    test_labels.append(example['label'].numpy())

test_labels = np.array(test_labels)

# Tokenize and pad test data
test_sequences = tokenizer.texts_to_sequences(test_texts)
padded_test_sequences = pad_sequences(test_sequences, maxlen=maxlen, padding='post')

# Evaluate the model on test data
loss, accuracy = model.evaluate(padded_test_sequences, test_labels)
print(f'Test Accuracy: {accuracy * 100:.2f}%')

Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10
Epoch 6/10
Epoch 7/10
Epoch 8/10
Epoch 9/10
Epoch 10/10
Test Accuracy: 28.83%
