In [7]:
import tensorflow as tf
from tensorflow.keras.layers import TextVectorization
import numpy as np

In [None]:
# ────────────────────────────────────────────────────────────────────────────────
# Configuration — must match training
# ────────────────────────────────────────────────────────────────────────────────
MAX_SEQUENCE_LEN = 200
CLASS_NAMES      = ["World", "Sports", "Business", "Sci/Tech"]
MODEL_PATH       = "D:/AIML/data/best_agnews_bilstm.h5"
VOCAB_PATH       = "D:/AIML/data/agnews_vocab.txt"
# ─────────────────────────────────────────────────

In [9]:
# 1) Load your trained model
model = tf.keras.models.load_model(MODEL_PATH)
model.summary()

Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 200, 128)          2560000   
                                                                 
 bidirectional (Bidirection  (None, 128)               98816     
 al)                                                             
                                                                 
 dropout (Dropout)           (None, 128)               0         
                                                                 
 dense (Dense)               (None, 64)                8256      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 4)                 260       
                                                        

In [10]:
# 2) Recreate the TextVectorization layer exactly as used in training
vectorizer = TextVectorization(
    output_mode="int",
    output_sequence_length=MAX_SEQUENCE_LEN
)

In [11]:
# 3) Load and set the saved vocabulary
with open(VOCAB_PATH, encoding="utf8") as f:
    vocab = [line.strip() for line in f if line.strip()]
vectorizer.set_vocabulary(vocab)

In [12]:
# 4) Helper to turn raw string → integer sequence
def encode(text: str) -> tf.Tensor:
    # vectorizer expects a batch dimension
    seq = vectorizer(tf.constant([text]))
    return seq  # shape (1, MAX_SEQUENCE_LEN)

In [13]:
# 5) Prediction function
def predict(text: str):
    seq   = encode(text)               # (1, MAX_SEQUENCE_LEN)
    probs = model.predict(seq)[0]      # (NUM_CLASSES,)
    idx   = int(np.argmax(probs))      # predicted class index
    return {
        "text": text,
        "predicted_class": CLASS_NAMES[idx],
        "confidence": float(probs[idx]),
        "all_probs": probs
    }

In [14]:
# 6) Demo on a few examples
examples = [
    "NASA launches new rover to explore the surface of Mars.",
    "Manchester United secure a thrilling victory in the Premier League.",
    "Apple unveils its latest MacBook Pro with M-series chip.",
    "Studies reveal rising global temperatures could affect crop yields."
]

for ex in examples:
    result = predict(ex)
    print(f"Text: {result['text'][:80]}…")
    print(f"  Predicted: {result['predicted_class']}  "
          f"(confidence {result['confidence']:.1%})\n")

Text: NASA launches new rover to explore the surface of Mars.…
  Predicted: Business  (confidence 50.2%)

Text: Manchester United secure a thrilling victory in the Premier League.…
  Predicted: Business  (confidence 68.8%)

Text: Apple unveils its latest MacBook Pro with M-series chip.…
  Predicted: World  (confidence 71.5%)

Text: Studies reveal rising global temperatures could affect crop yields.…
  Predicted: Business  (confidence 88.8%)

