In [1]:
import pandas
import nltk
import keras

from keras.models import Sequential
from keras.layers import Dense, Activation
from numpy import array as np_array

TENSED_POS_TAGS = ['VBD', 'VBG', 'VBN', 'VBP', 'VBZ']
STANCE_INDICES = {'FAVOR': 0, 'NONE': 1, 'AGAINST': 2}
TAG_SET = {}


def pos_tag(s):
    return nltk.pos_tag(nltk.word_tokenize(s))


def add_to_tag_set(s):
    tags = nltk.pos_tag(nltk.word_tokenize(s['Tweet']))
    for tag in tags:
        if tag[1] in TAG_SET:
            TAG_SET[tag[1]] = TAG_SET[tag[1]] + 1
        else:
            TAG_SET[tag[1]] = 1


def get_tag_set(rows):
    list(map(add_to_tag_set, rows))


def tag_is_tensed(tag):
    return tag[1] in TAG_SET


def get_target_rows(d_frame, target):
    if target == 'ALL':
        return list(
            map((lambda x: x[1]), d_frame.iterrows()))
    return list(
        filter((lambda row: row['Target'] == target),
            map((lambda x: x[1]), d_frame.iterrows())))


def get_tensed_tagged_words(rows):
    return list(
        map(
            (lambda row:
                list(filter(tag_is_tensed, pos_tag(row['Tweet'])))),
            rows))


def tensed_tag_counts(tags):
    just_tags = list(map((lambda x: x[1]), tags))
    return list(map((lambda tag: just_tags.count(tag)), TENSED_POS_TAGS))


def get_x_train(tags_for_tweets):
    return list(map(tensed_tag_counts, tags_for_tweets))


def onehot_for_stance(stance):
    one_hot = [0,0,0]
    one_hot[STANCE_INDICES[stance]] = 1
    return one_hot


def get_y_train(rows):
    return list(
        map(onehot_for_stance,
            map((lambda row: row['Stance']), rows)))


def get_xy_data(filename, target):
    training_df = pandas.read_csv(filename, sep='\t', encoding='latin1')
    target_rows = get_target_rows(training_df, target)
    # Get tags
    get_tag_set(target_rows)
    tensed_tagged_tweets = get_tensed_tagged_words(target_rows)
    x_train = get_x_train(tensed_tagged_tweets)
    y_train = get_y_train(target_rows)
    return (x_train, y_train)

Using TensorFlow backend.


# Training the model

In [2]:
labels = {
    'ALL': 'ALL',
    'HILLARY': 'Hillary Clinton',
    'ABORTION': 'Legalization of Abortion'
}

(x_train, y_train) = get_xy_data('trainingdata.txt', labels['ALL'])
(x_test, y_test) = get_xy_data('trialdata.txt', labels['ALL'])

# LEARN
model = Sequential()
model.add(Dense(units=8, input_dim=len(TENSED_POS_TAGS))) # layer
model.add(Activation('relu')) # layer
model.add(Dense(units=16))
model.add(Activation('relu'))
model.add(Dense(units=3)) # layer
model.add(Activation('softmax')) #layer

model.compile(loss='categorical_crossentropy',
              optimizer=keras.optimizers.SGD(lr=0.02, momentum=0.9, nesterov=True),
              metrics=['accuracy', 'mae']) # not layer

print(TAG_SET)

{'WRB': 388, 'DT': 3371, 'FW': 17, 'VBD': 681, ')': 97, 'RP': 175, 'RBS': 37, '#': 6540, 'NNS': 2445, 'IN': 4387, 'NNPS': 91, 'CC': 1184, 'LS': 2, 'UH': 36, 'MD': 790, 'VB': 2546, 'TO': 1229, '.': 3495, 'JJ': 4296, 'CD': 605, 'WDT': 115, '$': 36, ':': 664, '(': 75, '``': 237, 'WP$': 5, 'NN': 7469, 'PDT': 35, 'VBP': 2018, 'JJR': 136, 'VBN': 698, 'NNP': 10429, 'POS': 265, "''": 254, 'JJS': 118, 'RBR': 63, 'SYM': 4, 'VBG': 1073, 'WP': 336, 'VBZ': 1790, ',': 1179, 'RB': 2643, 'PRP$': 843, 'PRP': 2989, 'EX': 79}


In [1]:
model.fit(np_array(x_train), np_array(y_train), epochs=30, batch_size=32)

loss_and_metrics = model.evaluate(x_test, y_test, batch_size=128)
print(loss_and_metrics)

NameError: name 'model' is not defined