## Reuters dataset from the keras datasets library is used to build a model using Tf.keras


In [None]:
import tensorflow as tf
from tensorflow import keras

In [None]:
from tensorflow.keras.datasets import reuters

In [None]:
(train_data, train_labels),(test_data, test_labels) = tf.keras.datasets.reuters.load_data(num_words =10000, 
                                                                                          test_split = 0.25)

##### To see what the encoding of the words are we can call the built in function get_word_index() which returns a dictiorany of words mapped to integers

In [None]:
words = reuters.get_word_index()

In [None]:
print(dict(list(words.items())[0: 10]))

##### We will now vectorize the words using numpy. Python enumerate function allows us to iterare over a for loop as well as maintain a counter over it

In [None]:
import numpy as np

In [None]:
def vectorize(data, dimensions):
    results = np.zeros((len(data), dimensions))
    for i, s in enumerate(data):
        results[i,s] = 1.
    return results        

In [None]:
x_train = vectorize(train_data,10000)
x_test = vectorize(test_data,10000)

# There are 46 different topics hence we vectorize(a.k.a one-hot-encode) it into 46 dimensions
OH_train_labels = vectorize(train_labels,46)
OH_test_labels = vectorize (test_labels,46)

#### Now we start building the model

In [None]:
from tensorflow.keras import layers, models

In [None]:
model = models.Sequential()
model.add(layers.Dense(64, activation='relu', input_shape=(10000,)))
model.add(layers.Dense(64, activation='relu'))
model.add(layers.Dense(46, activation='softmax'))

In [None]:
model.compile(optimizer='rmsprop',
              loss='categorical_crossentropy',
              metrics=['accuracy'])

###### We separate out 1000 samples of the data for evaluation

In [None]:
x_val = x_train[:1000]
partial_x_train = x_train[1000:]

y_val = OH_train_labels[:1000]
partial_y_train = OH_train_labels [1000:]

##### Now we train the model

In [None]:
models = model.fit(partial_x_train,
                    partial_y_train,
                    epochs=5,
                    batch_size=512,
                    validation_data=(x_val, y_val))

In [None]:
r = model.evaluate(x_test,OH_test_labels)

##### We will now use the test data to make predictions. First we shuffle the test data

In [None]:
prediction = model.predict(x_test)

In [None]:
np.argmax(prediction[4])