In [8]:
import random
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score
from tensorflow.keras.preprocessing.text import Tokenizer
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Embedding, LSTM, Dense
import ipywidgets as widgets
from IPython.display import display, HTML

# Adding a background sketch to the UI
display(HTML("""
    <style>
        body {
            background: url('data:image/jpeg;base64,/9j/4AAQSkZJRgABAQAAAQABAAD...') no-repeat center center fixed;
            background-size: cover;
            font-family: Arial, sans-serif;
            color: white;
        }
        .content {
            background: rgba(0, 0, 0, 0.7);
            padding: 20px;
            border-radius: 10px;
            max-width: 600px;
            margin: 50px auto;
            text-align: center;
        }
        .content h1 {
            color: crimson;
        }
        .content button {
            margin: 10px 0;
        }
    </style>
    <div class='content'>
        <h1>The Rogue Reviewer</h1>
        <h2>A Data Poisoning Attack Adventure</h2>
        <p>Can you detect and prevent data poisoning before it's too late?</p>
        <hr>
    </div>
"""))

# Simulated IMDB dataset
data = {
    "review": [
        "Amazing movie with great acting!", "Horrible film, worst ever!",
        "Loved it, would watch again!", "Terrible experience, regret watching!",
        "A masterpiece, highly recommended!", "Awful script, waste of time!"
    ],
    "sentiment": [1, 0, 1, 0, 1, 0]
}
df = pd.DataFrame(data)

# Split data
train_data, test_data = train_test_split(df, test_size=0.3, random_state=42)

# Tokenization
tokenizer = Tokenizer(num_words=5000)
tokenizer.fit_on_texts(train_data["review"])
X_train = pad_sequences(tokenizer.texts_to_sequences(train_data["review"]), maxlen=10)
y_train = np.array(train_data["sentiment"])
X_test = pad_sequences(tokenizer.texts_to_sequences(test_data["review"]), maxlen=10)
y_test = np.array(test_data["sentiment"])

def poison_data():
    global train_data
    poisoned_reviews = random.sample(range(len(train_data)), k=1)  # Inject one poisoned entry
    train_data.iloc[poisoned_reviews, 1] = 1 - train_data.iloc[poisoned_reviews, 1]  # Flip sentiment
    output_text.value = "⚠️ The dataset has been poisoned! Watch out!"

def train_model():
    model = Sequential([
        Embedding(input_dim=5000, output_dim=32, input_length=10),
        LSTM(32, return_sequences=False),
        Dense(1, activation='sigmoid')
    ])

    model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
    model.fit(X_train, y_train, epochs=5, batch_size=2, verbose=1)

    predictions = (model.predict(X_test) > 0.5).astype("int32")
    accuracy = accuracy_score(y_test, predictions)
    model.save("bi_lstm_model.h5")  # Save the model
    output_text.value = f"✅ LSTM Model trained successfully! Accuracy: {accuracy:.2f}"

def open_colab():
    colab_link = "https://colab.research.google.com/"
    colab_iframe.value = f"<iframe src='{colab_link}' width='100%' height='500px'></iframe>"

# Widgets for interaction
btn_poison = widgets.Button(
    description="💀 Inject Poisoned Data",
    button_style='danger',
    tooltip='Modify dataset by injecting poisoned data'
)
btn_poison.on_click(lambda _: poison_data())

btn_train = widgets.Button(
    description="🚀 Train Bi-LSTM Model",
    button_style='success',
    tooltip='Train the sentiment analysis model'
)
btn_train.on_click(lambda _: train_model())

btn_colab = widgets.Button(
    description="🌐 Open Google Colab",
    button_style='info',
    tooltip='Open Google Colab in a popup'
)
btn_colab.on_click(lambda _: open_colab())

output_text = widgets.HTML(value="")
colab_iframe = widgets.HTML(value="")

# Layout
display(widgets.VBox([
    btn_poison,
    btn_train,
    btn_colab,
    output_text,
    colab_iframe
]))


VBox(children=(Button(button_style='danger', description='💀 Inject Poisoned Data', style=ButtonStyle(), toolti…

Epoch 1/5




[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 33ms/step - accuracy: 0.5000 - loss: 0.6931
Epoch 2/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 32ms/step - accuracy: 0.5000 - loss: 0.6909
Epoch 3/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 31ms/step - accuracy: 0.5000 - loss: 0.6874
Epoch 4/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 34ms/step - accuracy: 1.0000 - loss: 0.6806
Epoch 5/5
[1m2/2[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 1.0000 - loss: 0.6787
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 238ms/step


