In [1]:
# Run once
!pip install -q sentence-transformers tensorflow scikit-learn shap


In [2]:
from google.colab import drive
drive.mount('/content/drive')

Mounted at /content/drive


Imports & config

In [3]:
import os, pickle, json, math
import numpy as np
import pandas as pd
from pathlib import Path
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score, f1_score, roc_auc_score, classification_report, confusion_matrix
from sentence_transformers import SentenceTransformer
import tensorflow as tf
from tensorflow.keras import layers, models, Model, Input
from tensorflow.keras.callbacks import EarlyStopping, ModelCheckpoint
from joblib import dump, load

# Paths (update if you saved elsewhere)
SAVE_DIR = "/content/drive/MyDrive/LLM4Sec/Week3/feature_eng_artifacts"
manifest_path = os.path.join(SAVE_DIR, "final_feature_manifest.pkl")

assert os.path.exists(SAVE_DIR), "feature_eng_artifacts not found - update SAVE_DIR"
assert os.path.exists(manifest_path), "final_feature_manifest.pkl missing - run feature eng first"


In [4]:
GRU_SAVE_DIR = "/content/drive/MyDrive/LLM4Sec/Week3/feature_eng_artifacts/GRUmodel"
os.makedirs(GRU_SAVE_DIR, exist_ok=True)
print("Saving all GRU artifacts to:", GRU_SAVE_DIR)


Saving all GRU artifacts to: /content/drive/MyDrive/LLM4Sec/Week3/feature_eng_artifacts/GRUmodel


Load produced artifacts (X_seq, W_nmf, windows meta, HMM posteriors, SBERT model name)

In [5]:
with open(manifest_path, 'rb') as f:
    manifest = pickle.load(f)

print("Manifest keys:", manifest.keys())
X_seq = np.load(manifest['X_seq_path'])
W_nmf = np.load(manifest['W_nmf_path'])
window_emb = np.load(manifest['window_emb_path'])
hmm_clusters = np.load(manifest['hmm_clusters_path'])
windows_meta = pd.read_csv(manifest['windows_meta_csv'])

print("X_seq shape:", X_seq.shape)
print("W_nmf shape:", W_nmf.shape)
print("windows_meta shape:", windows_meta.shape)
SBERT_MODEL_NAME = manifest.get('sbert_model_name', 'all-MiniLM-L6-v2')
print("SBERT model:", SBERT_MODEL_NAME)
embedder = SentenceTransformer(SBERT_MODEL_NAME)  # used later for KB embeddings / similarity


Manifest keys: dict_keys(['X_seq_path', 'W_nmf_path', 'window_emb_path', 'hmm_clusters_path', 'windows_meta_csv', 'nmf_model', 'hmm_model', 'sbert_model_name'])
X_seq shape: (1991, 10, 384)
W_nmf shape: (1991, 12)
windows_meta shape: (1991, 5)
SBERT model: all-MiniLM-L6-v2


The secret `HF_TOKEN` does not exist in your Colab secrets.
To authenticate with the Hugging Face Hub, create a token in your settings tab (https://huggingface.co/settings/tokens), set it as secret in your Google Colab and restart your session.
You will be able to reuse this secret in all of your notebooks.
Please note that authentication is recommended but still optional to access public models or datasets.


modules.json:   0%|          | 0.00/349 [00:00<?, ?B/s]

config_sentence_transformers.json:   0%|          | 0.00/116 [00:00<?, ?B/s]

README.md: 0.00B [00:00, ?B/s]

sentence_bert_config.json:   0%|          | 0.00/53.0 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/612 [00:00<?, ?B/s]

model.safetensors:   0%|          | 0.00/90.9M [00:00<?, ?B/s]

tokenizer_config.json:   0%|          | 0.00/350 [00:00<?, ?B/s]

vocab.txt: 0.00B [00:00, ?B/s]

tokenizer.json: 0.00B [00:00, ?B/s]

special_tokens_map.json:   0%|          | 0.00/112 [00:00<?, ?B/s]

config.json:   0%|          | 0.00/190 [00:00<?, ?B/s]

Prepare labels (y) and optional auxiliary features

In [6]:
# labels in windows_meta (binary_label) or label column
if 'binary_label' in windows_meta.columns:
    y = windows_meta['binary_label'].values
elif 'label' in windows_meta.columns:
    # map textual labels if needed
    labs = windows_meta['label'].astype(str).str.lower().str.strip()
    y = (labs != 'normal').astype(int).values
else:
    raise RuntimeError("No window-level label found in windows_meta.")

# build auxiliary features
# 1) normalized W_nmf (optional scaling)
from sklearn.preprocessing import StandardScaler, OneHotEncoder
scaler_aux = StandardScaler().fit(W_nmf)
W_nmf_std = scaler_aux.transform(W_nmf)

# 2) HMM cluster one-hot
hmm_cluster_vec = hmm_clusters.reshape(-1,1)
ohe = OneHotEncoder(sparse_output=False, handle_unknown="ignore").fit(hmm_cluster_vec)
hmm_ohe = ohe.transform(hmm_cluster_vec)

# 3) combine aux
aux = np.hstack([W_nmf_std, hmm_ohe])   # shape (n_windows, N_NMF + n_hmm_states)

print("y shape:", y.shape, "aux shape:", aux.shape)

y shape: (1991,) aux shape: (1991, 20)


Train/test split (stratified)

In [7]:
X_train, X_test, aux_train, aux_test, y_train, y_test = train_test_split(
    X_seq, aux, y,
    test_size=0.25, stratify=y, random_state=42
)
print("Train:", X_train.shape, aux_train.shape, y_train.shape)
print("Test: ", X_test.shape, aux_test.shape, y_test.shape)


Train: (1493, 10, 384) (1493, 20) (1493,)
Test:  (498, 10, 384) (498, 20) (498,)


Build GRU model (sequence + auxiliary inputs)

In [8]:
SEQ_LEN, EMB_DIM = X_train.shape[1], X_train.shape[2]
AUX_DIM = aux_train.shape[1]

tf.keras.backend.clear_session()
seq_input = Input(shape=(SEQ_LEN, EMB_DIM), name="seq_input")
aux_input = Input(shape=(AUX_DIM,), name="aux_input")

x = layers.Masking()(seq_input)
x = layers.GRU(128, dropout=0.2, recurrent_dropout=0.1)(x)
x = layers.Dense(64, activation='relu')(x)
x = layers.Dropout(0.3)(x)

a = layers.BatchNormalization()(aux_input)
a = layers.Dense(64, activation='relu')(a)

merged = layers.Concatenate()([x, a])
merged = layers.Dense(64, activation='relu')(merged)
out = layers.Dense(1, activation='sigmoid', name="out")(merged)

model = Model([seq_input, aux_input], out)
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=['accuracy'])
model.summary()


In [12]:
import tensorflow as tf
from tensorflow.keras import layers, Model, Input
from tensorflow.keras.optimizers import Adam

def build_model(seq_len, emb_dim, aux_dim,
                rnn_units_1=128, rnn_units_2=64,
                dropout_rate=0.3, dense_units=64, lr=1e-3):
    # sequence input (SBERT embeddings)
    seq_input = Input(shape=(seq_len, emb_dim), name="seq_input")
    # stacked bidirectional GRU
    x = layers.Bidirectional(layers.GRU(rnn_units_1, return_sequences=True))(seq_input)
    x = layers.Dropout(dropout_rate)(x)
    x = layers.Bidirectional(layers.GRU(rnn_units_2, return_sequences=False))(x)
    x = layers.Dropout(dropout_rate)(x)

    # attention (simple additive attention)
    # Compute attention scores between the last GRU output and sequence outputs
    # (optional — a compact attention)
    attn_query = layers.Dense(rnn_units_2 * 2)(x)            # (batch, units)
    attn_scores = layers.Dot(axes=[1, 2])([attn_query[:, None, :], x[:, :, :]])  # shape handling
    # simpler: use self-attention layer from keras
    try:
        attn = layers.Attention()([x[:, None, :], x])  # uses (query, value)
        attn = tf.squeeze(attn, axis=1)
        x = layers.Concatenate()([x, attn])
    except Exception:
        # fallback: skip attention if above fails on your TF version
        pass

    # auxiliary input
    aux_input = Input(shape=(aux_dim,), name="aux_input")
    a = layers.BatchNormalization()(aux_input)
    a = layers.Dense(dense_units//2, activation="relu")(a)
    a = layers.Dropout(dropout_rate)(a)

    # combine
    z = layers.Concatenate()([x, a])
    z = layers.Dense(dense_units, activation="relu")(z)
    z = layers.Dropout(dropout_rate)(z)
    out = layers.Dense(1, activation="sigmoid", name="out")(z)

    model = Model(inputs=[seq_input, aux_input], outputs=out)

    # use Adam with a small weight decay (AdamW would be better; we emulate with kernel_regularizer below if needed)
    opt = Adam(learning_rate=lr)
    model.compile(optimizer=opt, loss="binary_crossentropy", metrics=["accuracy"])
    return model


Train GRU with callbacks

In [11]:
print("X_seq:", X_seq.shape)
print("W_nmf:", W_nmf.shape)
print("hmm_clusters:", hmm_clusters.shape)
print("aux:", aux.shape)
print("y:", y.shape)


X_seq: (1991, 10, 384)
W_nmf: (1991, 12)
hmm_clusters: (1991,)
aux: (1991, 20)
y: (1991,)


In [10]:
save_model_path = "/content/drive/MyDrive/LLM4Sec/models/gru_anom_model.keras"
ckpt = ModelCheckpoint(save_model_path, monitor='val_loss', save_best_only=True, verbose=1)
es = EarlyStopping(monitor='val_loss', patience=4, restore_best_weights=True)

history = model.fit(
    {"seq_input": X_train, "aux_input": aux_train},
    y_train,
    validation_split=0.1,
    epochs=30,
    batch_size=32,
    callbacks=[ckpt, es]
)


Epoch 1/30
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 47ms/step - accuracy: 0.5893 - loss: 0.6668
Epoch 1: val_loss improved from inf to 0.65322, saving model to /content/drive/MyDrive/LLM4Sec/models/gru_anom_model.keras
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 54ms/step - accuracy: 0.5895 - loss: 0.6667 - val_accuracy: 0.6133 - val_loss: 0.6532
Epoch 2/30
[1m41/42[0m [32m━━━━━━━━━━━━━━━━━━━[0m[37m━[0m [1m0s[0m 47ms/step - accuracy: 0.6329 - loss: 0.6476
Epoch 2: val_loss did not improve from 0.65322
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 49ms/step - accuracy: 0.6319 - loss: 0.6482 - val_accuracy: 0.6333 - val_loss: 0.6698
Epoch 3/30
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 33ms/step - accuracy: 0.6184 - loss: 0.6483
Epoch 3: val_loss did not improve from 0.65322
[1m42/42[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 35ms/step - accuracy: 0.6182 - loss: 0.6484 - val_accuracy: 0.

Evaluate model & basic metrics

In [None]:
# load best saved model
model = tf.keras.models.load_model(save_model_path)
y_prob = model.predict({"seq_input": X_test, "aux_input": aux_test}).ravel()
y_pred = (y_prob > 0.5).astype(int)

print("Accuracy:", accuracy_score(y_test, y_pred))
print("F1:", f1_score(y_test, y_pred))
print("AUC:", roc_auc_score(y_test, y_prob))
print("\nClassification report:\n", classification_report(y_test, y_pred, digits=4))
cm = confusion_matrix(y_test, y_pred)
print("Confusion matrix:\n", cm)


[1m16/16[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 35ms/step
Accuracy: 0.570281124497992
F1: 0.5977443609022557
AUC: 0.591684005096692

Classification report:
               precision    recall  f1-score   support

           0     0.5814    0.5020    0.5388       249
           1     0.5618    0.6386    0.5977       249

    accuracy                         0.5703       498
   macro avg     0.5716    0.5703    0.5683       498
weighted avg     0.5716    0.5703    0.5683       498

Confusion matrix:
 [[125 124]
 [ 90 159]]


In [None]:
print(windows_meta['binary_label'].value_counts())


binary_label
0    996
1    995
Name: count, dtype: int64


Threat mapping to Knowledge Bases (ATT&CK/CAPEC/CWE)

This cell expects optional CSVs with columns e.g. id, name, description. If you don't have them, skip (the code safely handles missing files).

In [None]:
import os
import xml.etree.ElementTree as ET
import pandas as pd

KB_DIR = "/content/drive/MyDrive/LLM4Sec/KB"

# ----------------------------
# 1. Load MITRE ATT&CK (CSV)
# ----------------------------
mitre_path = os.path.join(KB_DIR, "mitre_attack.csv")   # FIXED NAME
mitre_df = None

if os.path.exists(mitre_path):
    mitre_df = pd.read_csv(mitre_path, low_memory=False)
    if 'cleaned_tokens' in mitre_df.columns:
        mitre_df['kb_text'] = mitre_df['cleaned_tokens'].astype(str)
    else:
        mitre_df['kb_text'] = mitre_df['tokens'].astype(str)
    print("Loaded MITRE:", len(mitre_df))
else:
    print("MITRE file not found:", mitre_path)

# ----------------------------
# 2. Load CAPEC (CSV)
# ----------------------------
capec_path = os.path.join(KB_DIR, "capec.csv")
capec_df = None

if os.path.exists(capec_path):
    capec_df = pd.read_csv(capec_path, low_memory=False, encoding="ISO-8859-1")
    if 'cleaned_tokens' in capec_df.columns:
        capec_df['kb_text'] = capec_df['cleaned_tokens'].astype(str)
    else:
        capec_df['kb_text'] = capec_df['tokens'].astype(str)
    print("Loaded CAPEC:", len(capec_df))
else:
    print("CAPEC file not found:", capec_path)

# ----------------------------
# 3. Load CWE from XML
# ----------------------------
cwe_path = os.path.join(KB_DIR, "cwe.xml")   # FIXED NAME
cwe_df = None                                 # IMPORTANT: define first
cwe_items = []

if os.path.exists(cwe_path):
    tree = ET.parse(cwe_path)
    root = tree.getroot()
    ns = {'cwe': "http://cwe.mitre.org/cwe-7"}

    for weakness in root.findall(".//cwe:Weaknesses/cwe:Weakness", ns):
        wid = weakness.get("ID")
        name = weakness.get("Name")
        desc_node = weakness.find("cwe:Description", ns)
        desc = desc_node.text if desc_node is not None else ""
        cwe_items.append({"cwe_id": wid, "name": name, "kb_text": desc})

    cwe_df = pd.DataFrame(cwe_items)
    print("Loaded CWE:", len(cwe_df))
else:
    print("CWE XML not found:", cwe_path)

# ----------------------------
# Combine KBs
# ----------------------------

kb_sources = {}

if mitre_df is not None:
    kb_sources["mitre"] = {
        "df": mitre_df,
        "texts": mitre_df['kb_text'].tolist(),
        "id_col": "technique_id",
        "label_col": "technique"
    }

if capec_df is not None:
    kb_sources["capec"] = {
        "df": capec_df,
        "texts": capec_df['kb_text'].tolist(),
        "id_col": capec_df.columns[0],
        "label_col": "Description"
    }

if cwe_df is not None:
    kb_sources["cwe"] = {
        "df": cwe_df,
        "texts": cwe_df['kb_text'].tolist(),
        "id_col": "cwe_id",
        "label_col": "name"
    }

print("KB sources loaded:", kb_sources.keys())


Loaded MITRE: 623
Loaded CAPEC: 557
Loaded CWE: 968
KB sources loaded: dict_keys(['mitre', 'capec', 'cwe'])


Embed KB tokens using SBERT

In [None]:
kb_embeds = {}

for name, data in kb_sources.items():
    cache_path = os.path.join(KB_DIR, f"{name}_embeddings.npy")

    if os.path.exists(cache_path):
        kb_emb = np.load(cache_path)
        print(f"Loaded cached embeddings for {name}")
    else:
        kb_emb = embedder.encode(data['texts'], show_progress_bar=True)
        np.save(cache_path, kb_emb)
        print(f"Computed and cached embeddings for {name}")

    kb_embeds[name] = kb_emb


Loaded cached embeddings for mitre
Loaded cached embeddings for capec
Loaded cached embeddings for cwe


Compute similarity of each window to KB entries

In [None]:
from sklearn.metrics.pairwise import cosine_similarity

top_k = 3
kb_matches = {name: [] for name in kb_sources}

for i, we in enumerate(window_emb):
    for name, data in kb_sources.items():
        emb = kb_embeds[name]
        sims = cosine_similarity(we.reshape(1, -1), emb).ravel()
        top_idx = sims.argsort()[::-1][:top_k]

        top_entries = []
        df_kb = data['df']

        for idx in top_idx:
            kb_id = df_kb[data['id_col']].iloc[idx]
            kb_label = df_kb[data['label_col']].iloc[idx]

            top_entries.append(f"{kb_id} | {kb_label} | score={sims[idx]:.3f}")

        kb_matches[name].append("; ".join(top_entries))

# attach matches to windows_meta
for name in kb_sources:
    windows_meta[f"top_{name}"] = kb_matches[name]

windows_meta.head()


Unnamed: 0,window_text,start_idx,end_idx,binary_label,hmm_cluster,top_mitre,top_capec,top_cwe
0,get path httppath | url | mozillapath (linux; ...,0,10,1,2,attack-pattern--731f4f55-b6d0-41d1-a7a9-072a66...,"331 | An adversary, through a malicious web pa...",598 | Use of GET Request Method With Sensitive...
1,get path httppath | url | mozillapath (windows...,1,11,0,2,attack-pattern--a127c32c-cbb0-4f9d-be07-881a79...,141 | An attack of this type exploits the abil...,598 | Use of GET Request Method With Sensitive...
2,get path httppath | url | mozillapath (windows...,2,12,0,2,attack-pattern--731f4f55-b6d0-41d1-a7a9-072a66...,141 | An attack of this type exploits the abil...,598 | Use of GET Request Method With Sensitive...
3,get path httppath | url | mozillapath (linux; ...,3,13,1,2,attack-pattern--731f4f55-b6d0-41d1-a7a9-072a66...,141 | An attack of this type exploits the abil...,598 | Use of GET Request Method With Sensitive...
4,post path httppath | url | mozillapath (window...,4,14,1,4,attack-pattern--731f4f55-b6d0-41d1-a7a9-072a66...,141 | An attack of this type exploits the abil...,601 | URL Redirection to Untrusted Site ('Open...


Explainability table (dominant NMF components + KB matching + predictions)

In [None]:
# =====================================================
# Compute predictions for ALL windows (not just test set)
# =====================================================

print("Computing predictions for ALL windows...")

all_prob = model.predict({"seq_input": X_seq, "aux_input": aux}).ravel()
all_pred = (all_prob > 0.5).astype(int)

print("all_prob shape:", all_prob.shape)
print("all_pred distribution:", pd.Series(all_pred).value_counts())


Computing predictions for ALL windows...
[1m63/63[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 9ms/step
all_prob shape: (1991,)
all_pred distribution: 1    1076
0     915
Name: count, dtype: int64


In [None]:
from sklearn.model_selection import train_test_split

_, test_idx = train_test_split(
    np.arange(X_seq.shape[0]),
    test_size=0.25,
    stratify=y,
    random_state=42
)

rows = []
for i in test_idx:
    nmf_row = W_nmf[i]
    top_idx = np.argsort(nmf_row)[::-1][:3]

    row = {
        "window_id": i,
        "start_idx": int(windows_meta['start_idx'].iloc[i]),
        "label_true": int(windows_meta['binary_label'].iloc[i]),
        "pred_prob": float(all_prob[i]),
        "pred_label": int(all_pred[i]),
        "top_nmf": "; ".join([f"comp{t}:{nmf_row[t]:.3f}" for t in top_idx])
    }

    # KB matches
    for name in kb_sources:
        col = f"top_{name}"
        row[col] = windows_meta[col].iloc[i]

    rows.append(row)

explain_df = pd.DataFrame(rows)
explain_path = os.path.join(SAVE_DIR, "gru_predictions_explain.csv")
explain_df.to_csv(explain_path, index=False)

print("Saved:", explain_path)
explain_df.head()


Saved: /content/drive/MyDrive/LLM4Sec/Week3/feature_eng_artifacts/gru_predictions_explain.csv


Unnamed: 0,window_id,start_idx,label_true,pred_prob,pred_label,top_nmf,top_mitre,top_capec,top_cwe
0,1768,1768,0,0.426997,0,comp11:0.295; comp6:0.136; comp7:0.100,attack-pattern--a127c32c-cbb0-4f9d-be07-881a79...,141 | An attack of this type exploits the abil...,598 | Use of GET Request Method With Sensitive...
1,162,162,1,0.479161,0,comp10:0.372; comp11:0.295; comp6:0.165,attack-pattern--a127c32c-cbb0-4f9d-be07-881a79...,141 | An attack of this type exploits the abil...,598 | Use of GET Request Method With Sensitive...
2,1737,1737,0,0.593558,1,comp11:0.524; comp10:0.331; comp9:0.272,attack-pattern--a127c32c-cbb0-4f9d-be07-881a79...,"331 | An adversary, through a malicious web pa...",598 | Use of GET Request Method With Sensitive...
3,281,281,1,0.583157,1,comp10:0.290; comp11:0.240; comp6:0.222,attack-pattern--a127c32c-cbb0-4f9d-be07-881a79...,141 | An attack of this type exploits the abil...,598 | Use of GET Request Method With Sensitive...
4,567,567,1,0.58651,1,comp11:0.360; comp9:0.239; comp10:0.215,attack-pattern--a127c32c-cbb0-4f9d-be07-881a79...,141 | An attack of this type exploits the abil...,598 | Use of GET Request Method With Sensitive...


Save final artifacts: model .keras, wrapper .pkl, tokenizer metadata

In [None]:
# 1) Ensure model saved path (from training)
keras_path = save_model_path
assert os.path.exists(keras_path), "Keras model .keras not found at " + keras_path

# 2) Save tokenizer metadata (we don't use tokenizer; save embedder info)
tokenizer_meta = {
    "type": "embedding",
    "embedder": SBERT_MODEL_NAME,
    "seq_len": SEQ_LEN,
    "emb_dim": EMB_DIM
}
with open(os.path.join(SAVE_DIR, "tokenizer_meta.pkl"), "wb") as f:
    pickle.dump(tokenizer_meta, f)
print("Saved tokenizer metadata")

# 3) Create wrapper class
class GRUWrapper:
    def __init__(self, model_path, scaler_aux_path=None, ohe_path=None, extra=None):
        self.model_path = model_path
        self._model = None
        self.extra = extra or {}
    def load(self):
        if self._model is None:
            self._model = tf.keras.models.load_model(self.model_path)
        return self._model
    def predict_proba(self, X_seq, aux):
        m = self.load()
        return m.predict({"seq_input": X_seq, "aux_input": aux}).ravel()
    def predict(self, X_seq, aux, thresh=0.5):
        return (self.predict_proba(X_seq, aux) > thresh).astype(int)

# 4) Save wrapper as .pkl (this is your requested .pkl)
wrapper = GRUWrapper(keras_path, extra={"scaler": "W_nmf scaler stored in feature artifacts"})
pkl_path = os.path.join(SAVE_DIR, "gru_model_wrapper.pkl")
with open(pkl_path, "wb") as f:
    pickle.dump(wrapper, f)
print("Saved pickled wrapper at:", pkl_path)


Saved tokenizer metadata
Saved pickled wrapper at: /content/drive/MyDrive/LLM4Sec/Week3/feature_eng_artifacts/gru_model_wrapper.pkl


Quick demo: load wrapper and predict on first 5 test samples

In [None]:
with open(os.path.join(SAVE_DIR, "gru_model_wrapper.pkl"), "rb") as f:
    wrapper = pickle.load(f)

proba5 = wrapper.predict_proba(X_test[:5], aux_test[:5])
pred5  = wrapper.predict(X_test[:5], aux_test[:5])
print("proba:", proba5)
print("pred :", pred5)


[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 405ms/step
[1m1/1[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 38ms/step
proba: [0.42699686 0.47916108 0.593558   0.5831574  0.5865098 ]
pred : [0 0 1 1 1]


In [None]:
# =====================================================
# SAVE ALL GRU ARTIFACTS CLEANLY IN ONE FOLDER
# =====================================================

GRU_SAVE_DIR = "/content/drive/MyDrive/LLM4Sec/Week3/feature_eng_artifacts/GRUmodel"
os.makedirs(GRU_SAVE_DIR, exist_ok=True)
print("Saving all GRU artifacts to:", GRU_SAVE_DIR)

# 1) Save GRU Keras model
keras_path = os.path.join(GRU_SAVE_DIR, "gru_model.keras")
model.save(keras_path)
print("Saved GRU Keras model at:", keras_path)

# 2) Save tokenizer metadata (SBERT + sequence config)
tokenizer_meta = {
    "type": "embedding",
    "embedder": SBERT_MODEL_NAME,
    "seq_len": SEQ_LEN,
    "emb_dim": EMB_DIM
}
with open(os.path.join(GRU_SAVE_DIR, "tokenizer_meta.pkl"), "wb") as f:
    pickle.dump(tokenizer_meta, f)
print("Saved tokenizer metadata")

# 3) Save StandardScaler for NMF features
with open(os.path.join(GRU_SAVE_DIR, "scaler_aux.pkl"), "wb") as f:
    pickle.dump(scaler_aux, f)
print("Saved auxiliary scaler")

# 4) Save OneHotEncoder for HMM clusters
with open(os.path.join(GRU_SAVE_DIR, "hmm_ohe.pkl"), "wb") as f:
    pickle.dump(ohe, f)
print("Saved HMM OHE encoder")

# 5) Save GRU wrapper class instance
wrapper = GRUWrapper(
    model_path=keras_path,
    extra={"info": "SBERT embeddings + NMF + HMM aux features"}
)

with open(os.path.join(GRU_SAVE_DIR, "gru_model_wrapper.pkl"), "wb") as f:
    pickle.dump(wrapper, f)

print("Saved GRU model wrapper")

print("\n🎉 All GRU artifacts have been successfully saved!")


Saving all GRU artifacts to: /content/drive/MyDrive/LLM4Sec/Week3/feature_eng_artifacts/GRUmodel
Saved GRU Keras model at: /content/drive/MyDrive/LLM4Sec/Week3/feature_eng_artifacts/GRUmodel/gru_model.keras
Saved tokenizer metadata
Saved auxiliary scaler
Saved HMM OHE encoder
Saved GRU model wrapper

🎉 All GRU artifacts have been successfully saved!
