6. Classifying Reuters dataset using deep neural network-multiclass classification problem.

In [7]:
# Step 1: Import the necessary libraries
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import reuters
from tensorflow.keras.preprocessing.sequence import pad_sequences
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import Dense, Embedding, LSTM, GlobalMaxPooling1D
from sklearn.metrics import accuracy_score, classification_report

In [3]:
# Step 2: Load the Reuters dataset
num_words = 10000 # Consider only the top 10,000 most frequent words
(X_train, y_train), (X_test, y_test) = reuters.load_data(num_words=num_words)
# Check the shape of the data
print(f"Training samples: {len(X_train)}, Test samples: {len(X_test)}")

Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters.npz
[1m2110848/2110848[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 5us/step
Training samples: 8982, Test samples: 2246


In [5]:
# Step 3: Preprocess the data
max_len = 200 # Maximum sequence length
X_train = pad_sequences(X_train, maxlen=max_len)
X_test = pad_sequences(X_test, maxlen=max_len)
# Now each review has length 200
print(X_train.shape)

(8982, 200)


In [6]:
# Step 4: Define the neural network architecture
model = Sequential([
    Embedding(input_dim=num_words, output_dim=128,input_length=max_len),
    LSTM(units=64),
    Dense(units=46, activation='softmax') # Output layer with 46 units for 46 classes
])




In [9]:
# Step 5: Compile the model
model.compile(optimizer='adam',
loss='sparse_categorical_crossentropy', metrics=['accuracy'])
# Print the model summary
model.summary()

In [11]:
# Step 6: Train the model
history = model.fit(X_train, y_train, epochs=10, batch_size=128,validation_split=0.1)


Epoch 1/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 198ms/step - accuracy: 0.6507 - loss: 1.4451 - val_accuracy: 0.6329 - val_loss: 1.5257
Epoch 2/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 213ms/step - accuracy: 0.6778 - loss: 1.2983 - val_accuracy: 0.6452 - val_loss: 1.4688
Epoch 3/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m13s[0m 209ms/step - accuracy: 0.7428 - loss: 1.0881 - val_accuracy: 0.6574 - val_loss: 1.4150
Epoch 4/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 214ms/step - accuracy: 0.7711 - loss: 0.9357 - val_accuracy: 0.6707 - val_loss: 1.4116
Epoch 5/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 229ms/step - accuracy: 0.7938 - loss: 0.8485 - val_accuracy: 0.6719 - val_loss: 1.3918
Epoch 6/10
[1m64/64[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 229ms/step - accuracy: 0.8380 - loss: 0.6755 - val_accuracy: 0.6852 - val_loss: 1.3999
Epoch 7/10
[1m64/64[

In [12]:
# Step 7: Evaluate the model
y_pred = np.argmax(model.predict(X_test), axis=1)
accuracy = accuracy_score(y_test, y_pred)
print("Test Accuracy:", accuracy)
print(classification_report(y_test, y_pred))

[1m71/71[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m2s[0m 26ms/step
Test Accuracy: 0.6763134461264471
              precision    recall  f1-score   support

           0       0.06      0.08      0.07        12
           1       0.67      0.54      0.60       105
           2       0.60      0.15      0.24        20
           3       0.94      0.89      0.91       813
           4       0.76      0.86      0.81       474
           5       0.00      0.00      0.00         5
           6       0.31      0.36      0.33        14
           7       0.00      0.00      0.00         3
           8       0.28      0.74      0.41        38
           9       0.36      0.60      0.45        25
          10       0.57      0.57      0.57        30
          11       0.41      0.45      0.43        83
          12       0.17      0.15      0.16        13
          13       0.27      0.16      0.20        37
          14       0.00      0.00      0.00         2
          15       0.00    

  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))
  _warn_prf(average, modifier, msg_start, len(result))


In [16]:
# Step 8: Select random samples from the test dataset
num_samples = 10
test_size = len(X_test)
random_indices = np.random.choice(test_size, num_samples,replace=False)

In [21]:
# Step 9: Define a function to decode the integer sequences back intowords
def decode_sequence(sequence):
 word_index = reuters.get_word_index()
 reverse_word_index = dict([(value, key) for (key, value) in word_index.items()])
 return ' '.join([reverse_word_index.get(i - 3, '?') for i in sequence])


In [22]:
# Step 10: Print sample classification output
print("Sample Classification Output:")
for i in range(num_samples):
 print(f"Sample {i+1}:")
 print("Text:", decode_sequence(X_test[random_indices[i]]))
 print("Actual Label:", y_test[random_indices[i]])
 print("Predicted Label:", y_pred[random_indices[i]])
 print() # Add empty line for separation

Sample Classification Output:
Sample 1:
Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters_word_index.json
[1m550378/550378[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 2us/step
Text: it has placed its approximately 540 mln dlrs of medium and long term loans to brazil on non accrual status and that first quarter net income will be reduced by about seven mln dlrs as a result brazil suspended interest payments on its 68 billion dlrs of medium and long term debt on february 22 u s banking regulations do not require banks to stop accruing interest on loans until payments are 90 days overdue but bankers trust said it acted now because of the high potential of a continued suspension that would result in reaching the 90 day limit in the second quarter of 1987 assuming no cash payments at current interest rates are received for the rest of 1987 bankers trust estimated that full year net income would be reduced by about 30 mln dlrs bankers trust 