In [None]:
# ────────────────────────────────────────────────────────────────────────────────
# Cell 1 — Imports & Configuration
# ────────────────────────────────────────────────────────────────────────────────
import tensorflow as tf
import numpy as np
from tensorflow.keras.layers import TextVectorization

# (Must match training)
MAX_SEQUENCE_LEN = 200
MODEL_PATH       = "D:/AIML/data/bilstm_tc_fun_glove.h5"
VOCAB_PATH       = "D:/AIML/data/agnews_vocab.txt"
CLASS_NAMES      = ["World", "Sports", "Business", "Sci/Tech"]

In [2]:
# ────────────────────────────────────────────────────────────────────────────────
# Cell 2 — Load Your Trained Model
# ────────────────────────────────────────────────────────────────────────────────
model = tf.keras.models.load_model(MODEL_PATH)
model.summary()

Model: "bilstm_glove"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_tokens (InputLayer)   [(None, 200)]             0         
                                                                 
 embedding_5 (Embedding)     (None, 200, 300)          6000000   
                                                                 
 bidirectional (Bidirection  (None, 200, 128)          186880    
 al)                                                             
                                                                 
 bidirectional_1 (Bidirecti  (None, 128)               98816     
 onal)                                                           
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense (Dense)               (None, 64)               

In [3]:
# ────────────────────────────────────────────────────────────────────────────────
# Cell 3 — Reconstruct the TextVectorization Layer
# ────────────────────────────────────────────────────────────────────────────────
# 3.1 Create the layer exactly as in training (no max_tokens argument needed)
vectorizer = TextVectorization(
    output_mode="int",
    output_sequence_length=MAX_SEQUENCE_LEN
)

# 3.2 Load and set the saved vocabulary
with open(VOCAB_PATH, encoding="utf8") as f:
    vocab = [line.strip() for line in f if line.strip()]
vectorizer.set_vocabulary(vocab)

# Quick vocab sanity check
print("Vocabulary size:", len(vocab))
print("First 10 tokens:", vocab[:10])

Vocabulary size: 19999
First 10 tokens: ['[UNK]', 'the', 'to', 'a', 'of', 'in', 'and', 'on', 'for', '39s']


In [4]:
# ────────────────────────────────────────────────────────────────────────────────
# Cell 4 — Helper to Encode Raw Text
# ────────────────────────────────────────────────────────────────────────────────
def encode(text: str) -> tf.Tensor:
    """
    Turn a single raw string into a tensor of shape (1, MAX_SEQUENCE_LEN)
    of integer token IDs.
    """
    return vectorizer(tf.constant([text]))

In [5]:
# ────────────────────────────────────────────────────────────────────────────────
# Cell 5 — Prediction Function
# ────────────────────────────────────────────────────────────────────────────────
def predict(text: str):
    """
    Runs the model on the input text, returning:
      - predicted class name
      - confidence of that prediction
      - full probability vector
    """
    seq   = encode(text)             # shape (1, MAX_SEQUENCE_LEN)
    probs = model.predict(seq)[0]    # shape (NUM_CLASSES,)
    idx   = int(np.argmax(probs))    # index of highest probability
    return {
        "text": text,
        "predicted_class": CLASS_NAMES[idx],
        "confidence": float(probs[idx]),
        "all_probs": probs
    }

In [6]:
# ────────────────────────────────────────────────────────────────────────────────
# Cell 6 — Demo on Example Sentences
# ────────────────────────────────────────────────────────────────────────────────
examples = [
    "NASA announces new rover mission to study lunar surface.",
    "Manchester United win the Champions League final.",
    "Wall Street rallies after strong tech earnings report.",
    "Researchers develop AI model that can learn from small data."
]

for ex in examples:
    result = predict(ex)
    print(f"Text: {result['text'][:80]}…")
    print(f"Predicted: {result['predicted_class']}  "
          f"(confidence {result['confidence']:.1%})\n")

Text: NASA announces new rover mission to study lunar surface.…
Predicted: Business  (confidence 81.2%)

Text: Manchester United win the Champions League final.…
Predicted: Sports  (confidence 43.7%)

Text: Wall Street rallies after strong tech earnings report.…
Predicted: Sci/Tech  (confidence 66.8%)

Text: Researchers develop AI model that can learn from small data.…
Predicted: Sports  (confidence 74.3%)

