In [1]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from sklearn.model_selection import train_test_split

In [2]:
# ────────────────────────────────────────────────────────────────────────────────
# 0) Hyperparameters & Constants
# ────────────────────────────────────────────────────────────────────────────────
MAX_VOCAB_SIZE    = 20_000
MAX_SEQUENCE_LEN  = 200
EMBEDDING_DIM     = 128
RNN_UNITS         = 64
BATCH_SIZE        = 64
EPOCHS            = 1
AUTOTUNE          = tf.data.AUTOTUNE
NUM_CLASSES       = 4
CLASS_NAMES       = ["World","Sports","Business","Sci/Tech"]

In [3]:
# ────────────────────────────────────────────────────────────────────────────────
# 1) Load AG News into pandas
# ────────────────────────────────────────────────────────────────────────────────
train_df = pd.read_csv("D:/AIML/data/ag_news_train.csv", header=None,
                       names=["label","title","description"])
test_df  = pd.read_csv("D:/AIML/data/ag_news_test.csv",  header=None,
                       names=["label","title","description"])

# zero-base labels: 1→0, 2→1, 3→2, 4→3
train_df["label"] -= 1
test_df["label"]  -= 1

# combine title + description
train_df["text"] = train_df["title"].str.cat(train_df["description"], sep=" ")
test_df["text"]  = test_df["title"].str.cat(test_df["description"], sep=" ")

# split train → (train, val)
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_df["text"].values,
    train_df["label"].values,
    test_size=0.2,
    random_state=42,
    stratify=train_df["label"].values
)
test_texts  = test_df["text"].values
test_labels = test_df["label"].values

print(train_texts.shape, train_labels.shape)
print(val_texts.shape, val_labels.shape)
print(test_texts.shape, test_labels.shape)

(96000,) (96000,)
(24000,) (24000,)
(7600,) (7600,)


In [4]:
# ────────────────────────────────────────────────────────────────────────────────
# 2) TextVectorization
# ────────────────────────────────────────────────────────────────────────────────
vectorizer = layers.TextVectorization(
    max_tokens=MAX_VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=MAX_SEQUENCE_LEN
)
vectorizer.adapt(train_texts)

def vectorize_text(text, label):
    text = tf.expand_dims(text, -1)
    token_ids = vectorizer(text)
    return tf.squeeze(token_ids, axis=0), label


In [5]:
# ────────────────────────────────────────────────────────────────────────────────
# 3) tf.data Datasets
# ────────────────────────────────────────────────────────────────────────────────
def make_dataset(texts, labels, shuffle=False):
    ds = tf.data.Dataset.from_tensor_slices((texts, labels))
    if shuffle:
        ds = ds.shuffle(len(texts), seed=42)
    ds = ds.map(vectorize_text, num_parallel_calls=AUTOTUNE)
    ds = ds.batch(BATCH_SIZE).prefetch(AUTOTUNE)
    return ds

train_ds = make_dataset(train_texts, train_labels, shuffle=True)
val_ds   = make_dataset(val_texts,   val_labels)
test_ds  = make_dataset(test_texts,  test_labels)

In [6]:
# ────────────────────────────────────────────────────────────────────────────────
# 4) Build & train three variants: SimpleRNN, GRU, LSTM
# ────────────────────────────────────────────────────────────────────────────────
def build_and_train(model_name, recurrent_layer):
    print(f"\n>>> Training {model_name} model")
    model = models.Sequential([
        layers.Embedding(
            input_dim=MAX_VOCAB_SIZE,
            output_dim=EMBEDDING_DIM,
            input_length=MAX_SEQUENCE_LEN,
            mask_zero=True
        ),
        recurrent_layer,
        layers.Dropout(0.5),
        layers.Dense(64, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(NUM_CLASSES, activation="softmax")
    ])
    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    model.summary()

    # callbacks
    ckpt = callbacks.ModelCheckpoint(
        f"D:/AIML/data/agnews_{model_name}.h5",
        save_best_only=True,
        monitor="val_accuracy"
    )
    es = callbacks.EarlyStopping(
        restore_best_weights=True,
        monitor="val_loss",
        patience=2
    )

    # train
    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=EPOCHS,
        callbacks=[ckpt, es]
    )
    # evaluate
    loss, acc = model.evaluate(test_ds)
    print(f"{model_name} Test accuracy: {acc:.4f}")

    return model

In [7]:


# 4a) Simple RNN
rnn_model = build_and_train(
    "simple_rnn",
    layers.SimpleRNN(RNN_UNITS)
)

# 4b) GRU
gru_model = build_and_train(
    "gru",
    layers.GRU(RNN_UNITS)
)

# 4c) LSTM
lstm_model = build_and_train(
    "lstm",
    layers.LSTM(RNN_UNITS)
)


>>> Training simple_rnn model
Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 200, 128)          2560000   
                                                                 
 simple_rnn (SimpleRNN)      (None, 64)                12352     
                                                                 
 dropout (Dropout)           (None, 64)                0         
                                                                 
 dense (Dense)               (None, 64)                4160      
                                                                 
 dropout_1 (Dropout)         (None, 64)                0         
                                                                 
 dense_1 (Dense)             (None, 4)                 260       
                                                                 
Total params: 2576772 (9.

  saving_api.save_model(


simple_rnn Test accuracy: 0.8892

>>> Training gru model
Model: "sequential_1"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding_1 (Embedding)     (None, 200, 128)          2560000   
                                                                 
 gru (GRU)                   (None, 64)                37248     
                                                                 
 dropout_2 (Dropout)         (None, 64)                0         
                                                                 
 dense_2 (Dense)             (None, 64)                4160      
                                                                 
 dropout_3 (Dropout)         (None, 64)                0         
                                                                 
 dense_3 (Dense)             (None, 4)                 260       
                                                               

In [8]:
# ────────────────────────────────────────────────────────────────────────────────
# 5) Demo predictions (for the last trained model, e.g. lstm_model)
# ────────────────────────────────────────────────────────────────────────────────
def predict(text, model):
    seq   = vectorizer(tf.constant([text]))
    probs = model.predict(seq)[0]
    idx   = int(tf.argmax(probs))
    return CLASS_NAMES[idx], float(probs[idx])

examples = [
    "NASA launches new rover to explore Mars.",
    "Champions League final ends in dramatic upset.",
    "Federal Reserve hikes interest rates for third time.",
    "Breakthrough in AI promises better natural language understanding."
]

print("\nSample predictions with LSTM model:")
for t in examples:
    cls, conf = predict(t, lstm_model)
    print(f"{cls:<10} ({conf:.1%}): {t[:60]}…")


Sample predictions with LSTM model:
Sci/Tech   (97.3%): NASA launches new rover to explore Mars.…
Sports     (97.8%): Champions League final ends in dramatic upset.…
Business   (94.2%): Federal Reserve hikes interest rates for third time.…
Sci/Tech   (64.2%): Breakthrough in AI promises better natural language understa…
