In [1]:
import pandas as pd
import nltk   
import matplotlib.pyplot as plt
from wordcloud import WordCloud
from nltk.stem import WordNetLemmatizer 
from nltk.corpus import stopwords
from nltk.stem import PorterStemmer
from nltk.stem import LancasterStemmer
import seaborn as sns
from sklearn.model_selection import train_test_split
import string
from sklearn.feature_extraction.text import CountVectorizer 
from sklearn.feature_extraction.text import TfidfTransformer
from sklearn.naive_bayes import MultinomialNB
from sklearn.pipeline import Pipeline
from sklearn.metrics import classification_report, confusion_matrix
from newspaper import Article
from sklearn.neural_network import MLPClassifier
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.models import Sequential
from tensorflow.keras.optimizers import SGD
import numpy as np
from hyperas import optim
from hyperas.distributions import choice, uniform
from hyperopt import Trials, STATUS_OK, tpe
import _locale
import warnings
from numba import cuda 
import time
warnings.filterwarnings('ignore')
_locale._getdefaultlocale = (lambda *args: ['en_US', 'utf8'])
%matplotlib inline

In [2]:
def data():
    
    stemmer = LancasterStemmer()

    def text_process(mess):


        stopwordList = stopwords.words('english')

        mess = [char for char in mess if char not in string.punctuation]
        mess = ''.join(mess)

        words = nltk.word_tokenize(mess)
        words = [t for t in words if t not in stopwordList]
        words = [stemmer.stem(w.lower()) for w in words]

        return words
    
    def convert_sparse_matrix_to_sparse_tensor(X):
        coo = X.tocoo()
        indices = np.mat([coo.row, coo.col]).transpose()
        return tf.SparseTensor(indices, coo.data, coo.shape)
    t0 = time.time()
    
    df = pd.read_csv("news.csv", sep = "\t")
    df["target"] = df["subject"]
    df["TitleText"] = df["title"] + " " + df["text"]
    df = df.drop(columns = ["title", "text", "subject", "date"])
    df.target = df["target"].astype("category").cat.codes
    subject = tf.keras.utils.to_categorical(df["target"].values, num_classes=8)
    text_train, text_test, y_train, y_test = train_test_split(df["TitleText"], subject, test_size=0.2, random_state=101)
    
    t1 = time.time()
    print("Time to Load and Split Data: " + str(t1-t0))
    
    bow_transformer = CountVectorizer(analyzer=text_process).fit(df["TitleText"])
    text_bow = bow_transformer.transform(df["TitleText"])
    tfidf_transformer = TfidfTransformer().fit(text_bow)
    
    t2 = time.time()
    print("Time to Fit TFIDF Model: " + str(t2-t1))
    
    train_bow = bow_transformer.transform(text_train)
    train_tfidf = tfidf_transformer.transform(train_bow)
    test_bow = bow_transformer.transform(text_test)
    test_tfidf = tfidf_transformer.transform(test_bow)
    
    t3 = time.time()
    print("Time to Vectorize train and test sets: " + str(t3-t2))
    
    train_tensor = convert_sparse_matrix_to_sparse_tensor(train_tfidf)
    x_train = tf.sparse.reorder(train_tensor)
    test_tensor = convert_sparse_matrix_to_sparse_tensor(test_tfidf)
    x_test = tf.sparse.reorder(test_tensor)
    
    t4 = time.time()
    print("Time to Convert Matrix into Tensor and Reorder: " + str(t4-t3))
    print("Time to execute: " + str(t4-t0))
    return x_train, y_train, x_test, y_test

In [3]:
def model(x_train, y_train, x_test, y_test):
    model = tf.keras.Sequential()
    size = {{choice([8,10,12,14,16,18,20,22,24,26,28,30,32])}}
    activations={{choice(['relu', 'tanh'])}}
    choiceval = {{choice(['adam', 'rmsprop'])}}
    lr = {{uniform(0.0009, 0.00225)}}
    adam = tf.keras.optimizers.Adam(lr=lr)
    rmsprop = tf.keras.optimizers.RMSprop(lr=lr)
    hidden = {{choice([2,3,4,5,6])}}
    if choiceval == 'adam':
        optim = adam
    elif choiceval == 'rmsprop':
        optim = rmsprop
    model.add(layers.Dense(size, input_dim = x_train.shape[1], activation = activations)) # input layer requires input_dim param
    for i in range(hidden-1):
        model.add(layers.Dense(size, activation = activations))
    model.add(layers.Dense(8, activation='softmax'))
    model.compile(loss="binary_crossentropy", optimizer= optim, metrics=['accuracy'])


    es = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=0.0025, patience=1, verbose=0, mode='auto')
    with tf.device('/cpu:0'):
        model.fit(x_train, y_train, epochs = 100, shuffle = True, batch_size=128, verbose=0, callbacks=[es])
        score = model.evaluate(x_test, y_test, verbose=0)
    loss = score[0]
    return {'loss': loss, 'status': STATUS_OK, 'model': model} 

## Always Load Data with CPU (Just as Fast + Doesn't take All the Vram to Load)

In [4]:
with tf.device('/cpu:0'):
    x_train, y_train, x_test, y_test = data()

Time to Load and Split Data: 1.0114963054656982
Time to Fit TFIDF Model: 344.6874997615814
Time to Vectorize train and test sets: 172.36899876594543
Time to Convert Matrix into Tensor and Reorder: 0.5100018978118896
Time to execute: 518.5779967308044


In [None]:
tf.keras.backend.clear_session()
m = tf.keras.Sequential()
activations = "tanh"
choiceval = "rmsprop"
lr = 0.0001
adam = tf.keras.optimizers.Adam(lr=lr)
rmsprop = tf.keras.optimizers.RMSprop(lr=lr)
hidden = 2
size = 256
m.add(layers.Dense(256, input_dim = x_train.shape[1], activation = activations)) # input layer requires input_dim param
m.add(layers.Dense(256, activation = activations))
m.add(layers.Dense(8, activation = activations))
m.add(layers.Dense(8, activation='softmax'))
m.summary()
m.compile(loss="categorical_crossentropy", optimizer= adam, metrics=['acc'])

es = tf.keras.callbacks.EarlyStopping(monitor='loss', min_delta=-.01, patience=5, verbose=1, mode='auto')
with tf.device('/cpu:0'):
    m.fit(x_train, y_train, epochs = 100, shuffle = True, batch_size=128, verbose=1, callbacks=[es], validation_data=(x_test,y_test))
    #score = m.evaluate(x_test, y_test, verbose=1)


Model: "sequential"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 dense (Dense)               (None, 256)               47362560  
                                                                 
 dense_1 (Dense)             (None, 256)               65792     
                                                                 
 dense_2 (Dense)             (None, 8)                 2056      
                                                                 
 dense_3 (Dense)             (None, 8)                 72        
                                                                 
Total params: 47,430,480
Trainable params: 47,430,480
Non-trainable params: 0
_________________________________________________________________
Epoch 1/100
Epoch 2/100

In [None]:
best_run, best_model = optim.minimize(model=model, data=data, algo=tpe.suggest, max_evals=500, trials=Trials(), eval_space=True, notebook_name='Optimization')

In [None]:
best_model.summary()

In [None]:
print("Evalutation of best performing model:")
print(best_model.evaluate(x_test, y_test))
print("Best performing model chosen hyper-parameters:")
print(best_run)

In [24]:
m.evaluate(x_test, y_test)



[0.4539780616760254, 0.4270787835121155, 0.7720490097999573]

In [25]:
predictions = m.predict(x_test)

In [26]:
y_predictions=np.argmax(predictions, axis=1)
tests=np.argmax(y_test, axis=1)

In [27]:
print(confusion_matrix(y_predictions, tests))
print(classification_report(y_predictions,tests))

[[   1    5    0    7   19   23    0    0]
 [   0   40    0   67    0    0    0    0]
 [   1    0 1721    0   13   42    1    0]
 [   2  118    0   75    0    0    0    0]
 [  54    4    3    4  110  465    0    0]
 [ 223    0   71    0  752  774    4   14]
 [   2    0    7    0    1   13 2277   54]
 [   7    1    2    1    7   10   50 1935]]
              precision    recall  f1-score   support

           0       0.00      0.02      0.01        55
           1       0.24      0.37      0.29       107
           2       0.95      0.97      0.96      1778
           3       0.49      0.38      0.43       195
           4       0.12      0.17      0.14       640
           5       0.58      0.42      0.49      1838
           6       0.98      0.97      0.97      2354
           7       0.97      0.96      0.96      2013

    accuracy                           0.77      8980
   macro avg       0.54      0.53      0.53      8980
weighted avg       0.80      0.77      0.78      8980

