In [1]:
import numpy as np
import tensorflow as tf
from tensorflow.keras.datasets import reuters  # type: ignore


In [2]:
(X_train, y_train), (X_test, y_test) = reuters.load_data(num_words=None, test_split=0.2)

In [3]:
X_test.shape

(2246,)

In [4]:
print(X_train[0])

[1, 27595, 28842, 8, 43, 10, 447, 5, 25, 207, 270, 5, 3095, 111, 16, 369, 186, 90, 67, 7, 89, 5, 19, 102, 6, 19, 124, 15, 90, 67, 84, 22, 482, 26, 7, 48, 4, 49, 8, 864, 39, 209, 154, 6, 151, 6, 83, 11, 15, 22, 155, 11, 15, 7, 48, 9, 4579, 1005, 504, 6, 258, 6, 272, 11, 15, 22, 134, 44, 11, 15, 16, 8, 197, 1245, 90, 67, 52, 29, 209, 30, 32, 132, 6, 109, 15, 17, 12]


In [5]:
word_index = reuters.get_word_index()
print(word_index['is'])
label_name = reuters.get_label_names()
print(label_name[5])

20
wheat


In [6]:

index_to_word = {}
for key, value in word_index.items():
    index_to_word[value] = key


print(" ".join([index_to_word[x] for x in X_train[0]]))

the wattie nondiscriminatory mln loss for plc said at only ended said commonwealth could 1 traders now april 0 a after said from 1985 and from foreign 000 april 0 prices its account year a but in this mln home an states earlier and rise and revs vs 000 its 16 vs 000 a but 3 psbr oils several and shareholders and dividend vs 000 its all 4 vs 000 1 mln agreed largely april 0 are 2 states will billion total and against 000 pct dlrs


In [7]:
from tensorflow.keras.preprocessing.text import Tokenizer 

max_words = 100

tokenizer = Tokenizer(num_words = 100)

In [8]:
X_train = tokenizer.sequences_to_matrix(X_train, mode='binary')
X_test = tokenizer.sequences_to_matrix(X_test, mode='binary')

In [9]:
X_test.shape

(2246, 100)

In [10]:
from tensorflow.keras.utils import to_categorical

num_classes = 46

y_train = to_categorical(y_train, num_classes)
y_test = to_categorical(y_test, num_classes)

In [11]:
y_train.shape

(8982, 46)

In [12]:
from tensorflow.keras.models import Sequential
from tensorflow.keras.layers import SimpleRNN, Dense, Dropout, Activation
from tensorflow.keras.optimizers import Adam
from tensorflow.keras.preprocessing.sequence import pad_sequences


X_train = pad_sequences(X_train, padding="post")
X_test = pad_sequences(X_test, padding="post")

# make the train data 3-dimensional 
X_train = np.array(X_train).reshape(X_train.shape[0], X_train.shape[1], 1)
X_test = np.array(X_test).reshape(X_test.shape[0], X_test.shape[1], 1)

def rnn():
    model = Sequential()
    model.add(SimpleRNN(50, input_shape = (max_words, 1), return_sequences = False))
    model.add(Dense(num_classes))
    model.add(Activation('softmax'))
    model.summary()

    adam = Adam(learning_rate = 0.001)
    model.compile(loss="categorical_crossentropy", optimizer = adam, metrics=['accuracy'])

    return model


X_train.ndim

3

In [13]:
from scikeras.wrappers import KerasClassifier

model = KerasClassifier(build_fn=rnn, epochs=1000, batch_size=50)
model.fit(X_train, y_train)

  X, y = self._initialize(X, y)
  super().__init__(**kwargs)


Epoch 1/1000
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m3s[0m 4ms/step - accuracy: 0.2650 - loss: 2.9130
Epoch 2/1000
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.3740 - loss: 2.3710
Epoch 3/1000
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.3952 - loss: 2.2996
Epoch 4/1000
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4480 - loss: 2.1456
Epoch 5/1000
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4656 - loss: 2.1196
Epoch 6/1000
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4590 - loss: 2.1238
Epoch 7/1000
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4575 - loss: 2.1234
Epoch 8/1000
[1m180/180[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 4ms/step - accuracy: 0.4653 - loss: 2.1134
Epoch 9/1000
[1m180/180

In [14]:
from sklearn.metrics import accuracy_score

y_pred = model.predict(X_test)
y_test_ = np.argmax(y_pred, axis = 1)

print(accuracy_score(y_test, y_test_))

[1m45/45[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 4ms/step


ValueError: Classification metrics can't handle a mix of multilabel-indicator and binary targets