In [None]:
import os
import pandas as pd
import tensorflow as tf
from tensorflow.keras import layers, models, callbacks
from sklearn.model_selection import train_test_split

In [None]:
MAX_VOCAB_SIZE    = 20000
MAX_SEQUENCE_LEN  = 200
EMBEDDING_DIM     = 128
RNN_UNITS         = 64
BATCH_SIZE        = 64
EPOCHS            = 5                 # bumped up for meaningful training
AUTOTUNE          = tf.data.AUTOTUNE
NUM_CLASSES       = 4
CLASS_NAMES       = ["World","Sports","Business","Sci/Tech"]

In [None]:
tf.random.set_seed(42)

In [None]:
train_df = pd.read_csv(
    "C:/Users/skhan4/Documents/data/ag_news_train.csv",
    header=0               # first row is the actual header
)
train_df = train_df.rename(
    columns={"Class Index":"label","Title":"title","Description":"description"}
)

test_df = pd.read_csv(
    "C:/Users/skhan4/Documents/data/ag_news_test.csv",
    header=0
)
test_df = test_df.rename(
    columns={"Class Index":"label","Title":"title","Description":"description"}
)


# zero-based labels
train_df["label"] = train_df["label"].astype(int) - 1
test_df["label"]  = test_df["label"].astype(int) - 1

# combine title + description
train_df["text"] = train_df["title"] + " " + train_df["description"]
test_df["text"]  = test_df["title"]  + " " + test_df["description"]

# split out a validation set
train_texts, val_texts, train_labels, val_labels = train_test_split(
    train_df["text"].values,
    train_df["label"].values,
    test_size=0.2,
    random_state=42,
    stratify=train_df["label"].values
)
test_texts = test_df["text"].values
test_labels = test_df["label"].values


print("train_texts.shape:", train_texts.shape)
print("val_texts.shape:  ", val_texts.shape)
print("train_labels.shape:", train_labels.shape)
print("val_labels.shape:  ", val_labels.shape)
print("test_texts.shape: ", test_texts.shape)
print("test_labels.shape:", test_labels.shape)

"""
train_texts.shape: (3,)
val_texts.shape:   (2,)
train_labels.shape: (3,)
val_labels.shape:   (2,)

train_texts: [
    "The quick brown fox jumps over the lazy dog.",
    "TensorFlow makes data pipelines easy.",
    "I enjoy building neural networks."
]
val_texts: [
    "Keras simplifies model saving and loading.",
    "AI models require large datasets."
]
train_labels: [0, 1, 0]
val_labels:   [0, 1]
"""

# val_labels.shape:   (24000,)
# test_texts.shape:  (7600,)
# test_labels.shape: (7600,)

In [None]:
"""
from tensorflow.keras.layers import TextVectorization
import tensorflow as tf

# 1) Toy sentences
sentences = [
    "The cat sat on the mat.",
    "The dog ate my homework.",
    "Deep learning is fun."
]

# 2) Create & adapt the TextVectorization layer
vectorizer = TextVectorization(
    max_tokens=None,                # keep up to top-10 tokens
    output_mode="int",            # map each token → an integer
    output_sequence_length=8      # pad/truncate every output to length 5
)
vectorizer.adapt(sentences)

# 3) Inspect the learned vocabulary
vocab = vectorizer.get_vocabulary()
print("vocabulary :", vocab)
# e.g. ['','[UNK]','the','cat','sat','on','mat','dog']

# 4) Vectorize a batch of sentences
batch = tf.constant(sentences)[:, tf.newaxis]   # shape (3,1)
token_ids = vectorizer(batch)
print("token IDs:\n", token_ids.numpy())



vocabulary : ['', '[UNK]', 'the', 'sat', 'on', 'my', 'mat', 'learning', 'is', 'homework', 'fun', 'dog', 'deep', 'cat', 'ate']
token IDs:
 [[ 2 13  3  4  2  6  0  0]
 [ 2 11 14  5  9  0  0  0]
 [12  7  8 10  0  0  0  0]]
"""

In [None]:
vectorizer = layers.TextVectorization(
    max_tokens=MAX_VOCAB_SIZE,
    output_mode="int",
    output_sequence_length=MAX_SEQUENCE_LEN
)

vectorizer.adapt(train_texts) #counts how often each token appears, and then picks the top MAX_VOCAB_SIZE–2



In [None]:
def vectorize_text(text, label):
    tokens = vectorizer(tf.expand_dims(text, -1))
    return tf.squeeze(tokens, axis=0), label



def make_dataset(texts, labels, shuffle=False):
    ds = tf.data.Dataset.from_tensor_slices((texts, labels))
    print(type(ds))
    if shuffle:
        ds = ds.shuffle(buffer_size=len(texts), seed=42)
    return ds.map(vectorize_text, num_parallel_calls=AUTOTUNE) \
             .batch(BATCH_SIZE) \
             .prefetch(AUTOTUNE)

"""
After map:
[ ( [2,3,4,5,2,7], 0 ),
  ( [2,6,4,5,2,8], 1 ),
  ( [9,10,11,0,0,0], 0 ) ]

After batch (size=2):
(
  [[2,3,4,5,2,7],
   [2,6,4,5,2,8]],    # shape (2,6)
  [0,1]               # shape (2,)
), 
(
  [[9,10,11,0,0,0]],  # the last batch (could drop or pad depending on drop_remainder)
  [0]
)

With prefetch:
While your model is training on batch #1, batch #2 is already being prepared in the background
"""


train_ds = make_dataset(train_texts, train_labels, shuffle=True)
val_ds   = make_dataset(val_texts,   val_labels)
test_ds  = make_dataset(test_texts,  test_labels)

"""

sentences = [
    "the cat sat",
    "dog runs fast",
    "hello world",
    "tensorflow rocks",
    "deep learning",
    "vectorization layer"
]
labels = [0, 1, 0, 1, 0, 1]

MAX_VOCAB_SIZE   = 20
MAX_SEQUENCE_LEN = 4
BATCH_SIZE       = 2

Batch 1:
  x_batch shape: (2, 4)
  y_batch shape: (2,)
  x_batch values:
   [[ 2  3  4  0]    # e.g. ["the","cat","sat", <pad>]
    [ 5  6  7  0]]   # e.g. ["dog","runs","fast", <pad>]
  y_batch values: [0 1]

Batch 2:
  x_batch shape: (2, 4)
  y_batch shape: (2,)
  x_batch values:
   [[ 8  9  0  0]   # e.g. ["hello","world",<pad>,<pad>]
    [10 11  0  0]]  # e.g. ["tensorflow","rocks",<pad>,<pad>]
  y_batch values: [0 1]

Batch 3:
  x_batch shape: (2, 4)
  y_batch shape: (2,)
  x_batch values:
   [[12 13  0  0]   # e.g. ["deep","learning",<pad>,<pad>]
    [14 15  0  0]]  # e.g. ["vectorization","layer",<pad>,<pad>]
  y_batch values: [0 1]


"""



""" 
sentences = [
    "the cat sat",
    "dog runs fast",
    "hello world",
    "tensorflow rocks",
    "deep learning",
    "vectorization layer"
]
have exactly 14 distinct words

["the", "cat", "sat", "dog", "runs", "fast",
 "hello", "world", "tensorflow", "rocks",
 "deep", "learning", "vectorization", "layer"]


	dim_0	dim_1	dim_2
	0.496714153	-0.138264301	0.647688538
[UNK]	1.523029856	-0.234153375	-0.234136957
the	1.579212816	0.767434729	-0.469474386
cat	0.542560044	-0.463417693	-0.465729754
sat	0.241962272	-1.913280245	-1.724917833
dog	-0.562287529	-1.01283112	0.314247333
runs	-0.908024076	-1.412303701	1.465648769
fast	-0.2257763	0.067528205	-1.424748186
hello	-0.544382725	0.11092259	-1.150993577
world	0.375698018	-0.60063869	-0.29169375
tensorflow	-0.601706612	1.852278185	-0.013497225
rocks	-1.057710929	0.822544912	-1.22084365
deep	0.208863595	-1.959670124	-1.328186049
learning	0.196861236	0.73846658	0.171368281
vectorization	-0.115648282	-0.301103696	-1.47852199
layer	-0.719844208	-0.460638771	1.057122226

"""

In [None]:
ckpt_dir = "C:/Users/skhan4/Documents/data"
os.makedirs(ckpt_dir, exist_ok=True)
""" 
# Suppose our “embedded” input sequence is:
# shape = (1, 3, 2)
x = [[[0.5, -0.1],
      [1.0,  0.2],
      [-0.3, 0.8]]]
# A SimpleRNN with 4 units will:
# 1) Initialize h0 = [0,0,0,0]
# 2) Step through t=1..3 computing hidden states h1, h2, h3,
#    each a length-4 vector. Internally:
#      hi = activation( x_t · W_x + h_{i-1} · W_h + b )
#
# The final output is h3 of shape (1, 4). For example:
h3 = [[ 0.12, -0.33, 0.47,  0.05 ]]  # ← shape (1, 4)

Input to simpleRNN:
[[[0.5, -0.1],
  [1.0,  0.2],
  [-0.3, 0.8]]]              # shape (1, 3, 2)

Output of simpleRNN:  
[[0.9306,−0.9163,0.9180,−0.8475]]  # shape (1, 4)

net₁ = x₁·Wₓ + h₀·Wₕ + b
     = [0.5, -0.1]·Wₓ + [0,0,0,0]·Wₕ + b
     ≈ [ 2.965025  ,  -1.1565593,   0.4401187,   0.9483985]
h₁ = tanh(net₁) ≈ [ 0.9946974, -0.8199154, 0.4137428, 0.73905715 ]

net₂ = x₂·Wₓ + h₁·Wₕ + b

t=1
h1 = tanh(net1)=[0.9947,−0.8199,0.4137,0.7391]  
t=2
h2 = tanh(net2)=[0.9841,−0.4951,0.9428,0.9799]
t=3
h3 = tanh(net3)=[0.9306,−0.9163,0.9180,−0.8475]
Final output
h3=[0.9306,−0.9163,0.9180,−0.8475]

"""


In [None]:
model = models.Sequential([layers.Embedding(input_dim=MAX_VOCAB_SIZE,                                   output_dim=EMBEDDING_DIM,
                                    input_length=MAX_SEQUENCE_LEN,mask_zero=True),
        layers.SimpleRNN(RNN_UNITS),
        layers.Dropout(0.5),
        layers.Dense(64, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(NUM_CLASSES, activation="softmax")
        ])

"""
# output of SimpleRNN for two samples
h = [[0.93, -0.92, 0.92, -0.85],
     [0.10,  0.20, 0.30,  0.40]]   # shape (2,4)
Dropout(0.5)
h_drop = [[0.00, -0.92, 0.00, -0.85],
          [0.10,  0.00, 0.30,  0.00]]   # still (2,4)
Dense(3, softmax)
# [[0.10, 0.85, 0.05],
#  [0.33, 0.33, 0.34]]          

"""


In [None]:
model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
model.summary()

ckpt = callbacks.ModelCheckpoint(
        filepath=os.path.join(ckpt_dir, f"agnews_simpleRNN_1.h5"),
        save_best_only=True,
        monitor="val_accuracy"
    )
es = callbacks.EarlyStopping(
    restore_best_weights=True,
    monitor="val_accuracy",
    patience=2
)

In [None]:
history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=1,
        callbacks=[ckpt, es]
    )

loss, acc = model.evaluate(test_ds)
print(f"SimpleRNN Test accuracy: {acc:.4f}")

model.save(os.path.join(ckpt_dir, "agnews_simpleRNN_1.keras"))



In [None]:
ckpt_dir = "C:/Users/skhan4/Documents/data"
os.makedirs(ckpt_dir, exist_ok=True)

def build_and_train(model_name, recurrent_layer):
    print(f"\n>>> Training {model_name}")
    model = models.Sequential([
        layers.Embedding(
            input_dim=MAX_VOCAB_SIZE,
            output_dim=EMBEDDING_DIM,
            input_length=MAX_SEQUENCE_LEN,
            mask_zero=True
        ),
        recurrent_layer,
        layers.Dropout(0.5),
        layers.Dense(64, activation="relu"),
        layers.Dropout(0.5),
        layers.Dense(NUM_CLASSES, activation="softmax")
    ])
    model.compile(
        optimizer="adam",
        loss="sparse_categorical_crossentropy",
        metrics=["accuracy"]
    )
    model.summary()

    ckpt = callbacks.ModelCheckpoint(
        filepath=os.path.join(ckpt_dir, f"agnews_{model_name}.h5"),
        save_best_only=True,
        monitor="val_accuracy"
    )
    es = callbacks.EarlyStopping(
        restore_best_weights=True,
        monitor="val_accuracy",
        patience=2
    )

    history = model.fit(
        train_ds,
        validation_data=val_ds,
        epochs=EPOCHS,
        callbacks=[ckpt, es]
    )
    loss, acc = model.evaluate(test_ds)
    print(f"{model_name} Test accuracy: {acc:.4f}")
    return model



In [None]:

rnn_model  = build_and_train("simple_rnn", layers.SimpleRNN(RNN_UNITS))
gru_model  = build_and_train("gru",        layers.GRU(RNN_UNITS))
lstm_model = build_and_train("lstm",       layers.LSTM(RNN_UNITS))


In [None]:









def predict(text, model):
    seq   = vectorizer(tf.constant([text]))
    probs = model.predict(seq)[0]
    idx   = int(tf.argmax(probs))
    return CLASS_NAMES[idx], float(probs[idx])

examples = [
    "NASA launches new rover to explore Mars.",
    "Champions League final ends in dramatic upset.",
    "Federal Reserve hikes interest rates for third time.",
    "Breakthrough in AI promises better natural language understanding."
]

print("\nSample predictions with LSTM model:")
for t in examples:
    cls, conf = predict(t, lstm_model)
    print(f"{cls:<10} ({conf:.1%}): {t[:60]}…")
 