In [1]:
import tensorflow as tf
from tensorflow.keras.datasets import reuters
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, GlobalAveragePooling1D
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.utils import to_categorical


In [2]:
# Load top 10,000 most frequent words
num_words = 10000
(x_train, y_train), (x_test, y_test) = reuters.load_data(num_words=num_words)

# Pad sequences to same length
max_len = 200
x_train = pad_sequences(x_train, maxlen=max_len)
x_test = pad_sequences(x_test, maxlen=max_len)

# One-hot encode labels
num_classes = max(y_train) + 1
y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz


In [3]:
model = Sequential([
    Embedding(input_dim=num_words, output_dim=128, input_length=max_len),
    GlobalAveragePooling1D(),
    Dense(128, activation='relu'),
    Dense(num_classes, activation='softmax')
])

model.compile(optimizer='adam', loss='categorical_crossentropy', metrics=['accuracy'])
model.summary()


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 embedding (Embedding)       (None, 200, 128)          1280000   
                                                                 
 global_average_pooling1d (G  (None, 128)              0         
 lobalAveragePooling1D)                                          
                                                                 
 dense (Dense)               (None, 128)               16512     
                                                                 
 dense_1 (Dense)             (None, 46)                5934      
                                                                 
Total params: 1,302,446
Trainable params: 1,302,446
Non-trainable params: 0
_________________________________________________________________


In [4]:
history = model.fit(x_train, y_train, epochs=5, batch_size=64, validation_split=0.2)


Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


In [12]:
model.save('news.h5')

In [14]:
from tensorflow.keras.models import load_model

# Load the saved model
loaded_model = load_model("news.h5")
print("Model loaded successfully!")


Model loaded successfully!


In [15]:
test_loss, test_acc = model.evaluate(x_test, y_test)
print(f"Test Accuracy: {test_acc*100:.2f}%")


Test Accuracy: 67.90%


In [18]:
import numpy as np

category_names = [
    'cpi', 'earn', 'acq', 'crude', 'trade', 'interest', 'money-fx', 'grain', 'ship',
    'cotton', 'wheat', 'corn', 'jobs', 'lei', 'strategic-metal', 'veg-oil', 'oilseed',
    'gold', 'silver', 'coffee', 'rubber', 'tin', 'palm-oil', 'sugar', 'reserves', 'shipments',
    'zinc', 'alum', 'cocoa', 'housing', 'platinum', 'barley', 'rapeseed', 'livestock', 
    'pet-chem', 'nickel', 'bop', 'sun-meal', 'iron-steel', 'hog', 'lei-cc', 'money-supply',
    'cotton-oil', 'cocoa-oil', 'sorghum', 'gas'
]

# --- Step 7: Randomized predictions with category names ---
indices = np.random.choice(len(x_test), 5, replace=False)  # pick 5 random test articles
for i in indices:
    sample = x_test[i].reshape(1, -1)
    prediction = model.predict(sample, verbose=0)
    predicted_class = prediction.argmax()
    true_class = y_test[i].argmax()
    
    print(f"\nPredicted category: {category_names[predicted_class]}")
    print(f"True category:      {category_names[true_class]}")


Predicted category: crude
True category:      crude

Predicted category: oilseed
True category:      oilseed

Predicted category: crude
True category:      crude

Predicted category: trade
True category:      trade

Predicted category: crude
True category:      crude
