# Recurrent GAN (RGAN) for Sequential Data Generation

This notebook demonstrates the implementation of a Recurrent GAN (RGAN) to generate sequential data using the Human Activity Recognition (HAR) dataset.

## 1. Dataset Preparation

We use the HAR dataset, a public time-series dataset containing sensor data from wearable devices. The data is normalized and reshaped to fit the input format required by the RGAN.

In [11]:
import tensorflow as tf
import numpy as np
import zipfile
import os

# Function to extract, load, and preprocess the HAR dataset
def load_har_dataset(zip_path):
    # Specify the directory to extract the dataset
    extract_dir = "data/UCI HAR Dataset/"
    
    # Extract the ZIP file if not already extracted
    if not os.path.exists(extract_dir):
        with zipfile.ZipFile(zip_path, 'r') as zip_ref:
            zip_ref.extractall("data/")
        print("Dataset extracted successfully.")
    else:
        print("Dataset already extracted.")
    
    # Load training data: Each row corresponds to a sample with sequential features
    X_train = np.loadtxt(extract_dir + "train/X_train.txt")
    
    # Print the shape before reshaping
    print(f"Shape of X_train before reshaping: {X_train.shape}")
    
    # Normalize the data to the range [0, 1] for better training performance
    X_train = X_train / np.max(X_train, axis=0)
    
    # Calculate the total number of rows that can form sequences of 128 steps
    valid_rows = (X_train.shape[0] // 128) * 128
    
    # Truncate the data to ensure divisibility by 128
    X_train = X_train[:valid_rows]
    
    # Reshape the truncated data into [samples, time steps, features]
    X_train = X_train.reshape((-1, 128, 9))
    
    return X_train

# Path to your ZIP file
zip_path = "data/UCI HAR Dataset.zip"

# Call the function to load and preprocess the data
data = load_har_dataset(zip_path)

# Create a TensorFlow dataset object for training
# Shuffle the data and create batches for training
batch_size = 32
dataset = tf.data.Dataset.from_tensor_slices(data).shuffle(1000).batch(batch_size)

# Print the shape of the dataset to confirm dimensions
print(f"Dataset shape after reshaping: {data.shape}")
# Dataset shape: (Number of samples, Time steps, Features)


Dataset already extracted.


Shape of X_train before reshaping: (7352, 561)
Dataset shape after reshaping: (3553, 128, 9)


## 2. Define the Generator

The generator is an LSTM-based model that takes random noise as input and generates synthetic sequences matching the characteristics of the real data.

In [12]:
# Define the generator model for RGAN
class RGANGenerator(tf.keras.Model):
    def __init__(self, noise_dim, hidden_dim, seq_len, features):
        super(RGANGenerator, self).__init__()
        
        # LSTM layer to process the input noise
        self.rnn = tf.keras.layers.LSTM(hidden_dim, return_sequences=True)
        
        # Dense layer to map the RNN output to the required number of features
        self.fc = tf.keras.layers.Dense(features)
        
        # Store the sequence length and noise dimensions for future reference
        self.seq_len = seq_len
        self.noise_dim = noise_dim

    def call(self, z):
        # Forward pass through the LSTM layer
        rnn_out = self.rnn(z)  # Shape: [batch_size, seq_len, hidden_dim]
        
        # Map the output to the desired feature space
        return self.fc(rnn_out)  # Shape: [batch_size, seq_len, features]


## 3. Define the Discriminator

The discriminator is an LSTM-based model that distinguishes between real and synthetic sequences, providing feedback to improve the generator.

In [13]:
# Define the discriminator model for RGAN
class RGANDiscriminator(tf.keras.Model):
    def __init__(self, hidden_dim):
        super(RGANDiscriminator, self).__init__()
        
        # LSTM layer to process the input sequences
        self.rnn = tf.keras.layers.LSTM(hidden_dim, return_sequences=False)
        
        # Dense layer to output a single probability (real or fake)
        self.fc = tf.keras.layers.Dense(1, activation='sigmoid')

    def call(self, x):
        # Forward pass through the LSTM layer
        rnn_out = self.rnn(x)  # Shape: [batch_size, hidden_dim]
        
        # Output a single probability score
        return self.fc(rnn_out)  # Shape: [batch_size, 1]


## 4. Define Loss Functions and Optimizers

Binary Cross-Entropy loss is used for both the generator and discriminator, and Adam optimizers are used to update the model weights.

In [14]:
# Define the binary cross-entropy loss
# This is used for both the generator and discriminator
bce_loss = tf.keras.losses.BinaryCrossentropy()

# Define optimizers for both models
generator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)
discriminator_optimizer = tf.keras.optimizers.Adam(learning_rate=0.001)


## 5. Define the Training Step

Each training step involves generating synthetic sequences, calculating losses for both models, and updating their weights using gradients.

In [20]:
@tf.function  # Optimize execution by compiling the function
def train_step(real_sequences):
    # Get the batch size dynamically
    batch_size = tf.shape(real_sequences)[0]

    # Generate random noise input for the generator
    noise = tf.random.normal([batch_size, seq_len, noise_dim])

    # Use GradientTape to track operations for automatic differentiation
    with tf.GradientTape(persistent=True) as tape:
        # Generate fake sequences from noise
        fake_sequences = generator(noise)

        # Get discriminator predictions for real and fake sequences
        real_output = discriminator(real_sequences)  # Real sequence predictions
        fake_output = discriminator(fake_sequences)  # Fake sequence predictions

        # Compute discriminator loss
        d_loss_real = bce_loss(tf.ones_like(real_output), real_output)
        d_loss_fake = bce_loss(tf.zeros_like(fake_output), fake_output)
        d_loss = d_loss_real + d_loss_fake

        # Compute generator loss
        g_loss = bce_loss(tf.ones_like(fake_output), fake_output)

    # Compute gradients for both models
    generator_gradients = tape.gradient(g_loss, generator.trainable_variables)
    discriminator_gradients = tape.gradient(d_loss, discriminator.trainable_variables)

    # Apply gradients to update model weights
    generator_optimizer.apply_gradients(zip(generator_gradients, generator.trainable_variables))
    discriminator_optimizer.apply_gradients(zip(discriminator_gradients, discriminator.trainable_variables))

    return g_loss, d_loss


## 6. Train the RGAN

The RGAN is trained over multiple epochs, processing data in batches and displaying progress after each epoch.

In [21]:
# Training parameters
# Define the sequence length and noise dimensions
seq_len = 128  # The length of each sequence in the HAR dataset
noise_dim = 100  # Dimensionality of the noise vector for the generator
epochs = 50

# Loop through each epoch
for epoch in range(epochs):
    # Loop through batches of real sequences
    for real_sequences in dataset:
        # Perform a training step
        g_loss, d_loss = train_step(real_sequences)
    
    # Print progress at the end of each epoch
    print(f"Epoch {epoch + 1}, Generator Loss: {g_loss.numpy()}, Discriminator Loss: {d_loss.numpy()}")


NameError: in user code:

    File "C:\Users\Rishu\AppData\Local\Temp\ipykernel_21736\201821176.py", line 12, in train_step  *
        fake_sequences = generator(noise)

    NameError: name 'generator' is not defined


## 7. Generate and Visualize Sequences

After training, the generator creates synthetic sequences from random noise, and the results are visualized for evaluation.

In [18]:
# Generate synthetic sequences after training
noise = tf.random.normal([5, seq_len, noise_dim])  # Generate 5 sequences
generated_sequences = generator(noise).numpy()

# Plot the generated sequences
for i, seq in enumerate(generated_sequences[:3]):
    plt.figure(figsize=(10, 3))
    plt.title(f"Generated Sequence {i+1}")
    for feature_idx in range(features):
        plt.plot(seq[:, feature_idx], label=f"Feature {feature_idx+1}")
    plt.xlabel("Time Steps")
    plt.ylabel("Value")
    plt.legend()
    plt.show()


NameError: name 'seq_len' is not defined