# Downloading a dataset

In [1]:
from sklearn.datasets import fetch_20newsgroups
newsgroups_train = fetch_20newsgroups(subset='train')
newsgroups_test = fetch_20newsgroups(subset='test')

Downloading 20news dataset. This may take a few minutes.
Downloading dataset from https://ndownloader.figshare.com/files/5975967 (14 MB)


# Preprocessing

In [79]:
import keras.preprocessing.text as kpt

max_words = 1000
num_classes = 20

tokenizer = kpt.Tokenizer(num_words=max_words)
tokenizer.fit_on_texts(newsgroups_train["data"])  

x_train = tokenizer.texts_to_matrix(newsgroups_train["data"], mode='binary') 
x_test = tokenizer.texts_to_matrix(newsgroups_test["data"], mode='binary') 

y_train = keras.utils.to_categorical(newsgroups_train["target"], num_classes) 
y_test = keras.utils.to_categorical(newsgroups_test["target"], num_classes)


# Making model

In [80]:
from keras.models import Sequential
from keras.layers.core import Dense, Activation, Dropout

model = Sequential([
          Dense(512, input_shape=(max_words,)),
          Activation('relu'),
          Dropout(0.5),
          Dense(num_classes),
          Activation('softmax')
        ])

# print(model.to_yaml())
# print(model.to_json())

# Preparing model 

In [81]:
model.compile(loss='categorical_crossentropy',
              optimizer='adam',
              metrics=['accuracy'])

# Learning

In [82]:
from keras.callbacks import EarlyStopping  

early_stopping=EarlyStopping(monitor='loss')  
batch_size = 100
epochs = 50

history = model.fit(x_train, y_train,
                    batch_size=batch_size,
                    epochs=epochs,
                    verbose=1,
                    validation_split=0.1,
                    callbacks=[early_stopping])

Train on 10182 samples, validate on 1132 samples
Epoch 1/50
Epoch 2/50
Epoch 3/50
Epoch 4/50
Epoch 5/50
Epoch 6/50
Epoch 7/50
Epoch 8/50
Epoch 9/50
Epoch 10/50
Epoch 11/50
Epoch 12/50
Epoch 13/50
Epoch 14/50
Epoch 15/50
Epoch 16/50
Epoch 17/50
Epoch 18/50
Epoch 19/50
Epoch 20/50
Epoch 21/50
Epoch 22/50
Epoch 23/50
Epoch 24/50
Epoch 25/50
Epoch 26/50
Epoch 27/50
Epoch 28/50
Epoch 29/50
Epoch 30/50


# Testing

In [83]:
score = model.evaluate(x_test, y_test, batch_size=batch_size)



In [84]:
print(score)

[1.7281236063592054, 0.651752520924354]
