Basic Neural Network model to classify disaster tweets for Kaggle competition. 

1 Building the neural net

In [17]:
import numpy as np
import pandas as pd
import math

In [23]:
# supporting functions
def sigmoid(x):
    return 1.0/(1+ np.exp(-x))

def sigmoid_derivative(x):
    return x * (1.0 - x)

# 2 layer nn definition
class NeuralNetwork:
    # nn consists of the following components:
    # arbitraty amount of hidden layers, 2 in this case
    def __init__(self, x, y):
        # an input layer, x
        self.input      = x
        # a set of weights and biases (assumed 0 for simplicity)
        self.weights1   = np.random.rand(self.input.shape[1],4) 
        self.weights2   = np.random.rand(4,1) 
        # an output layer, y
        self.y          = y
        self.output     = np.zeros(y.shape)
    
    #calculating the predicted output y hat
    def feedforward(self):
        self.layer1 = sigmoid(np.dot(self.input, self.weights1))
        self.output = sigmoid(np.dot(self.layer1, self.weights2))
    
    # determine the gradient of loss function so we know which direction to move our predictions in
    def backprop(self):
        # application of the chain rule to find derivative of the loss function with respect to weights2 and weights1
        d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * sigmoid_derivative(self.output)))
        d_weights1 = np.dot(self.input.T,  (np.dot(2*(self.y - self.output) * sigmoid_derivative(self.output), self.weights2.T) * sigmoid_derivative(self.layer1)))

        # update the weights with the derivative (slope) of the loss function
        self.weights1 += d_weights1
        self.weights2 += d_weights2

In [22]:
import numpy as np

def sigmoid(x):
    return 1.0/(1+ np.exp(-x))

def sigmoid_derivative(x):
    return x * (1.0 - x)

class NeuralNetwork:
    def __init__(self, x, y):
        self.input      = x
        self.weights1   = np.random.rand(self.input.shape[1],4) 
        self.weights2   = np.random.rand(4,1)                 
        self.y          = y
        self.output     = np.zeros(self.y.shape)

    def feedforward(self):
        self.layer1 = sigmoid(np.dot(self.input, self.weights1))
        self.output = sigmoid(np.dot(self.layer1, self.weights2))

    def backprop(self):
        # application of the chain rule to find derivative of the loss function with respect to weights2 and weights1
        d_weights2 = np.dot(self.layer1.T, (2*(self.y - self.output) * sigmoid_derivative(self.output)))
        d_weights1 = np.dot(self.input.T,  (np.dot(2*(self.y - self.output) * sigmoid_derivative(self.output), self.weights2.T) * sigmoid_derivative(self.layer1)))

        # update the weights with the derivative (slope) of the loss function
        self.weights1 += d_weights1
        self.weights2 += d_weights2

[[0.01153772]
 [0.97014246]
 [0.96935826]
 [0.0370082 ]]


In [42]:
# example application
if __name__ == "__main__":
    X = np.array([[0,0,1],
                  [0,1,1],
                  [1,0,1],
                  [1,1,1]])
    y = np.array([[0],[1],[1],[0]])
    nn = NeuralNetwork(X,y)

    for i in range(1500):
        nn.feedforward()
        nn.backprop()

    print("predictions: ",nn.output)
    print("average error: ",sum((nn.output - y)**2)/len(y))

predictions:  [[0.01429117]
 [0.98055405]
 [0.97481588]
 [0.02574103]]
average error:  [0.00046981]


2 Kaggle competiton

In [121]:
tweets = pd.read_csv(r'nlp-getting-started\train.csv', usecols = ['id','text','target'])
ids = tweets['id']
tweets = tweets[['target','text']]

In [62]:
pd.set_option('display.max_colwidth', -1)
# looks like some text cleaning will be neccessary, keep it naive for now, but some ideas for later:
# remove special charactes, puncuation, hashtags
# fix spelling errors, remove extra letters
tweets[['target','text']].head(20)

Unnamed: 0,target,text
0,1,Our Deeds are the Reason of this #earthquake May ALLAH Forgive us all
1,1,Forest fire near La Ronge Sask. Canada
2,1,All residents asked to 'shelter in place' are being notified by officers. No other evacuation or shelter in place orders are expected
3,1,"13,000 people receive #wildfires evacuation orders in California"
4,1,Just got sent this photo from Ruby #Alaska as smoke from #wildfires pours into a school
5,1,#RockyFire Update => California Hwy. 20 closed in both directions due to Lake County fire - #CAfire #wildfires
6,1,"#flood #disaster Heavy rain causes flash flooding of streets in Manitou, Colorado Springs areas"
7,1,I'm on top of the hill and I can see a fire in the woods...
8,1,There's an emergency evacuation happening now in the building across the street
9,1,I'm afraid that the tornado is coming to our area...


In [109]:
# create our training data from the tweets
train_x = np.asarray(tweets.text)
# index all the sentiment labels
train_y = np.asarray([[x] for x in tweets.target])

In [110]:
# time to get serious
import json
import keras
import keras.preprocessing.text as kpt
from keras.preprocessing.text import Tokenizer

In [111]:
# only work with the 3000 most popular words found in our dataset
max_words = 3000

# create a new Tokenizer
tokenizer = Tokenizer(num_words=max_words)
# feed our tweets to the Tokenizer
tokenizer.fit_on_texts(train_x)

# Tokenizers come with a convenient list of words and IDs
dictionary = tokenizer.word_index
# Let's save this out so we can use it later
with open('dictionary.json', 'w') as dictionary_file:
    json.dump(dictionary, dictionary_file)


def convert_text_to_index_array(text):
    # one really important thing that `text_to_word_sequence` does
    # is make all texts the same length -- in this case, the length
    # of the longest text in the set.
    return [dictionary[word] for word in kpt.text_to_word_sequence(text)]

allWordIndices = []
# for each tweet, change each token to its ID in the Tokenizer's word_index
for text in train_x:
    wordIndices = convert_text_to_index_array(text)
    allWordIndices.append(wordIndices)

# now we have a list of all tweets converted to index arrays.
# cast as an array for future usage.
allWordIndices = np.asarray(allWordIndices)

# create one-hot matrices out of the indexed tweets
train_x = tokenizer.sequences_to_matrix(allWordIndices, mode='binary')

In [113]:
# create our first neural network!
if __name__ == "__main__":
    X = train_x
    y = train_y
    nn = NeuralNetwork(X,y)

    for i in range(1500):
        nn.feedforward()
        nn.backprop() 
    print("average error: ",sum((nn.output - y)**2)/len(y))

predictions:  [[0.]
 [0.]
 [0.]
 ...
 [0.]
 [0.]
 [0.]]
average error:  [0.42965979]


looks like our model does a terrible job, getting nearly 1/2 the predictions wrong!

There's lots of room for improvements, including:
    1 raising the max word limt to > 3000
    2 cleaning the raw tweets, see above for ideas
    3 running more iterations (not likely to do much unless we do steps 1 and 2 fist)

In [126]:
pred = [int(x) for x in nn.output.round()]
train_1 = pd.DataFrame({"id":ids,"target":pred})

In [100]:
from keras.models import Sequential
from keras.layers import Dense, Dropout, Activation

# treat the labels as categories
train_y = keras.utils.to_categorical(train_y, 2)

model = Sequential()
model.add(Dense(512, input_shape=(max_words,), activation='relu'))
model.add(Dropout(0.5))
model.add(Dense(256, activation='sigmoid'))
model.add(Dropout(0.5))
model.add(Dense(2, activation='softmax'))





Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


In [101]:
train_x

array([[0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       ...,
       [0., 1., 1., ..., 0., 0., 0.],
       [0., 0., 0., ..., 0., 0., 0.],
       [0., 1., 1., ..., 0., 0., 0.]])

In [102]:
train_y

array([[0., 1.],
       [0., 1.],
       [0., 1.],
       ...,
       [0., 1.],
       [0., 1.],
       [0., 1.]], dtype=float32)