In [18]:
import numpy as np
from tensorflow.keras import layers, models
from tensorflow.keras.datasets import reuters


In [4]:
(train_data, train_labels), (test_data, test_labels) = reuters.load_data(num_words=10_000)

In [7]:
train_data.shape

(8982,)

In [10]:
train_data[0][:10]


[1, 2, 2, 8, 43, 10, 447, 5, 25, 207]

In [11]:
word_index = reuters.get_word_index()
word_index = dict([(value, key) for (key, value) in word_index.items()])
for _ in train_data[0]:
  # las primeras 3 palabras del word index son palabras reservadas
  print(word_index.get(_ - 3))


Downloading data from https://storage.googleapis.com/tensorflow/tf-keras-datasets/reuters_word_index.json
None
None
None
said
as
a
result
of
its
december
acquisition
of
space
co
it
expects
earnings
per
share
in
1987
of
1
15
to
1
30
dlrs
per
share
up
from
70
cts
in
1986
the
company
said
pretax
net
should
rise
to
nine
to
10
mln
dlrs
from
six
mln
dlrs
in
1986
and
rental
operation
revenues
to
19
to
22
mln
dlrs
from
12
5
mln
dlrs
it
said
cash
flow
per
share
this
year
should
be
2
50
to
three
dlrs
reuter
3


In [12]:
train_labels[0]


3

In [13]:
# debemos volver la lista del dataset
# a tensores para que la red los pueda entender
def vectorize(sequences, dim=10000):
  results = np.zeros((len(sequences), dim))
  for i, sequences in enumerate(sequences):
    results[i, sequences] = 1
  return results


In [14]:
x_train = vectorize(train_data)
x_test = vectorize(test_data)


In [17]:
from tensorflow.keras.utils import to_categorical


In [19]:
y_train = to_categorical(train_labels)
y_test = to_categorical(test_labels)

In [20]:
y_train[0]

array([0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0.], dtype=float32)

In [21]:
model = models.Sequential()

model.add(
  layers.Dense(
    64,
    activation='relu',
    input_shape=(10000,)
  )
)
model.add(
  layers.Dense(
    64,
    activation='relu'
  )
)

"""
No usamos sigmoid porque la relacion binaria de 0 a 1 no me permitiria
realizar clasificacion multiple

en su lugar la funcion sigmoid, con una relacion de -1 a 1 nos permite
clasificar cada una de las posibles salidas sin importar la cantidad
en base a la relacion de ser y no ser una categoria
"""
model.add(
  layers.Dense(
    46,
    activation='softmax'
  )
)

In [22]:
model.compile(
  optimizer='rmsprop',
  loss='categorical_crossentropy',
  metrics=['accuracy']
)

In [23]:
x_val = x_train[:1000]
partial_x_train = x_train[1000:]

y_val = y_train[:1000]
partial_y_train = y_train[1000:]