# Implementation of the deployment of Stacker6X and test with a simulated data

## Imports & Setup
This notebook imports all necessary libraries and modules using `from utils.imports import *`, which centralizes all dependencies required for deployment simulation. See `utils/imports.py` for full details.

In [1]:
# if using in Colab, run this first
# import sys
# sys.path.append('/content/sample_data/')

# Imports from utils/imports.py
from imports import pd, np, os, nltk, TfidfVectorizer, joblib, random

[nltk_data] Downloading package punkt to /root/nltk_data...
[nltk_data]   Unzipping tokenizers/punkt.zip.
[nltk_data] Downloading package stopwords to /root/nltk_data...
[nltk_data]   Unzipping corpora/stopwords.zip.


In [2]:
# Checking if the file is saved
print(os.path.exists('/content/drive/MyDrive/Colab Notebooks/Stacker6X_trained_model.pkl'))  # Will return True if the file is in the current directory


True


In [3]:
# Load the trained ensemble model
stacker6X_model_instance = joblib.load('/content/drive/MyDrive/Colab Notebooks/Stacker6X_trained_model.pkl')


In [4]:
# Checking if the file is saved
print(os.path.exists('/content/drive/MyDrive/Colab Notebooks/Stacker6X_trained_model.pkl'))  # Will return True if the file is in the current directory


True


In [6]:
# Load the fitted TF-IDF vectorizer. It was saved before data was split
# loaded_tfidf_vectorizer can now be used to transform new data

loaded_tfidf_vectorizer = joblib.load('/content/drive/MyDrive/Colab Notebooks/tfidf_vectorizer.pkl')


In [7]:
# Checking if the file is saved
print(os.path.exists('/content/drive/MyDrive/Colab Notebooks/tfidf_vectorizer.pkl'))  # Will return True if the file is in the current directory

True


In [8]:
# --- 1. Simulate random samples per class ---
n_samples = 7752
samples_per_class = n_samples // 3

# SQL Injection payloads
sql_payloads = [
    "SELECT * FROM users WHERE id=1",
    "OR 1 =1 --",
    "UNION SELECT password FROM accounts",
    "' OR '1'='1",
    "'; DROP TABLE users; --"
]

# XSS payloads
xss_payloads = [
    "<script>alert('XSS')</script>",
    "<img src=x onerror=alert('XSS')>",
    "<svg onload=alert(1)>",
    "<body onload=alert('test')>",
    "<iframe src='javascript:alert(1)'></iframe>"
]

# Normal (safe) text
normal_payloads = [
    "Normal login attempt",
    "User submitted contact form",
    "Page loaded successfully",
    "Viewing profile page",
    "Search results for 'shoes'",
    "Login successful",
    "Order placed for 3 items",
    "Welcome back, Mark!",
    "Welcome back, John!",
    "User profile updated",
    "Settings saved successfully",
    "You have logged out",
    "Search: hiking backpacks",
    "Blog post: Best coding practices",
    "Comment added: Nice article!"
]

# Randomly sample and combine
raw_text_data = (
    random.choices(sql_payloads, k=samples_per_class) +
    random.choices(xss_payloads, k=samples_per_class) +
    random.choices(normal_payloads, k=samples_per_class)
)
labels = [0]*samples_per_class + [1]*samples_per_class + [2]*samples_per_class

# Shuffle both
combined = list(zip(raw_text_data, labels))
random.shuffle(combined)
raw_text_data, labels = zip(*combined)

In [9]:
# --- 2. Create a DataFrame ---
df9 = pd.DataFrame({'text': raw_text_data, 'True_Label': labels})

In [10]:
# --- 3. Fit or load the TF-IDF vectorizer ---
# Because loaded_tfidf_vectorizer is already fitted, fitting is skipped:
loaded_tfidf_vectorizer = joblib.load('/content/drive/MyDrive/Colab Notebooks/tfidf_vectorizer.pkl')
X_train_vectorized = loaded_tfidf_vectorizer.transform(df9['text'])


In [11]:
# --- 4. Using the loaded Stacker6X model instance to make predictions on the vectorized simulated data ---
y_pred_st = stacker6X_model_instance.predict(X_train_vectorized)

print(f"Shape of vectorized simulated data: {X_train_vectorized.shape}")

Shape of vectorized simulated data: (7752, 7752)


In [12]:
# --- 5. Creating df10 (can be same as df9), reset index ---
df10 = df9.copy().reset_index(drop=True)

In [13]:
# --- 6. Assign predictions to df10 ---
test_indices2 = df10.index  # [0, ..., 7751]
df10.loc[test_indices2, 'y_pred_st'] = y_pred_st[:len(df10)]

In [14]:
# --- 7. Optional: Mapping numeric predictions to class names ---
class_mapping = {0: "SQLInjection", 1: "XSS", 2: "Normal"}
df10['Predicted_Label'] = [class_mapping[pred] for pred in y_pred_st]

In [15]:
# --- 8. Print / analyzing ---
print(df10[['text', 'True_Label', 'y_pred_st', 'Predicted_Label']].head(10))

                               text  True_Label  y_pred_st Predicted_Label
0     <script>alert('XSS')</script>           1        1.0             XSS
1  <img src=x onerror=alert('XSS')>           1        1.0             XSS
2                  Login successful           2        0.0    SQLInjection
3           '; DROP TABLE users; --           0        0.0    SQLInjection
4             <svg onload=alert(1)>           1        1.0             XSS
5                       ' OR '1'='1           0        0.0    SQLInjection
6       <body onload=alert('test')>           1        1.0             XSS
7       <body onload=alert('test')>           1        1.0             XSS
8           '; DROP TABLE users; --           0        0.0    SQLInjection
9              User profile updated           2        0.0    SQLInjection
