## Install Required Libraries

In [1]:
pip install textattack tensorflow pandas numpy scikit-learn nltk

Note: you may need to restart the kernel to use updated packages.


## Load and Preprocess Data

In [21]:
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
import pandas as pd
import nltk
from sklearn.model_selection import train_test_split
import tensorflow as tf
from tensorflow.keras.models import load_model
import numpy as np
import matplotlib.pyplot as plt
nltk.download('stopwords')

[nltk_data] Downloading package stopwords to
[nltk_data]     C:\Users\dell\AppData\Roaming\nltk_data...
[nltk_data]   Package stopwords is already up-to-date!


True

In [22]:
# Cell 2: Load and preprocess data (unchanged)
df = pd.read_csv("../data/training.1600000.processed.noemoticon.csv", encoding="latin-1")
df = df.iloc[:, [0, 5]]
df.columns = ['sentiment', 'text']
df.drop_duplicates(inplace=True)
df['sentiment'] = df['sentiment'].map({0: 'negatif', 4: 'positif'})
df['sentiment'] = df['sentiment'].map({'negatif': 0, 'positif': 1})  # Convert to binary labels
print(df.head())

   sentiment                                               text
0          0  is upset that he can't update his Facebook by ...
1          0  @Kenichan I dived many times for the ball. Man...
2          0    my whole body feels itchy and like its on fire 
3          0  @nationwideclass no, it's not behaving at all....
4          0                      @Kwesidei not the whole crew 


In [23]:
# Cell 3: Text preprocessing
max_words = 10000  # Maximum number of words in vocabulary
max_len = 100      # Maximum sequence length
tokenizer = Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(df['text'])
sequences = tokenizer.texts_to_sequences(df['text'])
X = pad_sequences(sequences, maxlen=max_len)
y = df['sentiment'].values

In [24]:
# Split the data
X_train, X_test, y_train, y_test = train_test_split(X, y, test_size=0.2, random_state=42)

## Load my LSTM Model

In [25]:
from tensorflow.keras.models import load_model
model = load_model('../models/best_model.keras')

## Generate Adversarial Examples with TextAttack

In [26]:
# Cell 5: Adversarial Training with FGM
def fgm_perturbation(model, x, y, epsilon=0.1):
    """
    Generate adversarial examples using Fast Gradient Method (FGM).
    Perturbs the embeddings of the input sequences.
    """
    # Ensure model is in training mode
    model.trainable = True
    
    # Get the embedding layer
    embedding_layer = model.get_layer(index=0)
    
    # Create a model that outputs embeddings
    embedding_model = tf.keras.Model(inputs=model.input, 
                                   outputs=embedding_layer.output)
    
    # Get embeddings for the input
    embeddings = embedding_model(x)
    
    # Compute gradients of loss with respect to embeddings
    with tf.GradientTape() as tape:
        tape.watch(embeddings)
        preds = model(x)
        loss = tf.keras.losses.binary_crossentropy(y, preds)
    
    # Calculate gradients
    gradients = tape.gradient(loss, embeddings)
    
    # Normalize gradients (sign of gradients for FGM)
    gradients_sign = tf.sign(gradients)
    
    # Apply perturbation to embeddings
    adv_embeddings = embeddings + epsilon * gradients_sign
    
    # Create a model to convert perturbed embeddings back to predictions
    adv_model = tf.keras.Sequential([
        tf.keras.layers.Input(shape=(max_len, model.get_layer(index=0).output_dim)),
        *model.layers[1:]  # Include all layers after the embedding layer
    ])
    
    # Get predictions for adversarial examples
    adv_preds = adv_model(adv_embeddings)
    
    return adv_preds

In [27]:
# Custom fine-tuning loop with adversarial training
def fine_tune_with_adversarial(model, X_train, y_train, X_test, y_test, epochs=3, batch_size=32, epsilon=0.1):
    # Use a smaller learning rate for fine-tuning
    optimizer = tf.keras.optimizers.Adam(learning_rate=1e-5)
    model.compile(optimizer=optimizer, loss='binary_crossentropy', 
                  metrics=['accuracy', 'precision', 'recall'])
    
    history = {'accuracy': [], 'val_accuracy': [], 'loss': [], 'val_loss': []}
    
    for epoch in range(epochs):
        print(f'Fine-Tuning Epoch {epoch+1}/{epochs}')
        batch_losses = []
        batch_accuracies = []
        
        # Iterate over batches
        for i in range(0, len(X_train), batch_size):
            x_batch = X_train[i:i+batch_size]
            y_batch = y_train[i:i+batch_size]
            
            # Standard training step
            with tf.GradientTape() as tape:
                preds = model(x_batch, training=True)
                loss = tf.keras.losses.binary_crossentropy(y_batch, preds)
            
            gradients = tape.gradient(loss, model.trainable_variables)
            optimizer.apply_gradients(zip(gradients, model.trainable_variables))
            
            # Adversarial training step
            adv_preds = fgm_perturbation(model, x_batch, y_batch, epsilon)
            with tf.GradientTape() as tape:
                adv_loss = tf.keras.losses.binary_crossentropy(y_batch, adv_preds)
            
            adv_gradients = tape.gradient(adv_loss, model.trainable_variables)
            optimizer.apply_gradients(zip(adv_gradients, model.trainable_variables))
            
            # Compute metrics
            batch_losses.append(loss.numpy().mean())
            batch_accuracies.append(tf.keras.metrics.binary_accuracy(y_batch, preds).numpy().mean())
        
        # Validation step
        val_pred = model(X_test, training=False)
        val_loss = tf.keras.losses.binary_crossentropy(y_test, val_pred).numpy().mean()
        val_accuracy = tf.keras.metrics.binary_accuracy(y_test, val_pred).numpy().mean()
        
        # Log metrics
        history['loss'].append(np.mean(batch_losses))
        history['accuracy'].append(np.mean(batch_accuracies))
        history['val_loss'].append(val_loss)
        history['val_accuracy'].append(val_accuracy)
        
        print(f'Loss: {history["loss"][-1]:.4f}, Accuracy: {history["accuracy"][-1]:.4f}, '
              f'Val Loss: {val_loss:.4f}, Val Accuracy: {val_accuracy:.4f}')
    
    return history

In [28]:
# Cell 6: Fine-tune the model with adversarial training
history = fine_tune_with_adversarial(model, X_train, y_train, X_test, y_test, 
                                    epochs=3, batch_size=32, epsilon=0.1)

Fine-Tuning Epoch 1/3


ValueError: Arguments `target` and `output` must have the same rank (ndim). Received: target.shape=(32,), output.shape=(32, 2)

In [None]:
# Cell 7: Save the fine-tuned model
model.save('../models/best_model_adversarial.keras')

In [None]:
# Cell 8: Plot training history (unchanged)
plt.plot(history['accuracy'], label='Train Accuracy')
plt.plot(history['val_accuracy'], label='Validation Accuracy')
plt.plot(history['loss'], label='Train Loss')
plt.plot(history['val_loss'], label='Validation Loss')
plt.legend()
plt.show()

In [None]:
# Cell 9: Evaluate the fine-tuned model (unchanged)
results = model.evaluate(X_test, y_test, batch_size=32)
print(f"Test Loss: {results[0]:.4f}")
print(f"Test Accuracy: {results[1]:.4f}")
print(f"Test Precision: {results[2]:.4f}")
print(f"Test Recall: {results[3]:.4f}")

In [None]:
# Cell 10: Test robustness on adversarial examples
adv_test_preds = fgm_perturbation(model, X_test, y_test, epsilon=0.1)
adv_test_accuracy = tf.keras.metrics.binary_accuracy(y_test, adv_test_preds).numpy().mean()
print(f"Adversarial Test Accuracy: {adv_test_accuracy:.4f}")