## Data Preprocessing

In [27]:
import pandas as pd
import re

df = pd.read_csv('BIG_DATA.txt')
df = df.drop(['Unnamed: 0'], axis=1)
print(df.head())

         Date                                               text ticker signal
0  2015-01-02  success apple pay part determined lies hands p...   AAPL   down
1  2015-01-05        time take note lies top fashion agenda 2015   AAPL   down
2  2015-01-08  google app store largest world still makes far...   AAPL     up
3  2015-01-09  facebook eventually reverts top stories view n...   AAPL     up
4  2015-01-14  planned bill would expand new york definition ...   AAPL   down


## BERT Model (a majority of this code is copied from the BERT Tutorial)

In [28]:
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime
from tensorflow import keras
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization
import os
import re
import pandas as pd
import numpy as np

# df = pd.read_csv('texts_and_fin2.csv')
print(df.head())

         Date                                               text ticker signal
0  2015-01-02  success apple pay part determined lies hands p...   AAPL   down
1  2015-01-05        time take note lies top fashion agenda 2015   AAPL   down
2  2015-01-08  google app store largest world still makes far...   AAPL     up
3  2015-01-09  facebook eventually reverts top stories view n...   AAPL     up
4  2015-01-14  planned bill would expand new york definition ...   AAPL   down


In [29]:
# Run this cell for a function for oversampling

def oversample(X,y):
    # Get number of rows with imbalanced class
    target = y.sum().idxmax()
    n = y[target].sum()
    # identify imbalanced targets
    imbalanced = y.drop(target,axis=1)
    #For each target, create a dataframe of randomly sampled rows, append to list
    append_list =  [y.loc[y[col]==1].sample(n=n-y[col].sum(),replace=True,random_state=20) for col in imbalanced.columns]
    append_list.append(y)
    y = pd.concat(append_list,axis=0)
    # match y indexes on other inputs
    X = X.loc[y.index]
    assert (y.index.all() == X.index.all())
    return X, y

df = df.rename(columns = {"text": "filtered_text"})
df = df.sort_values(by='Date', ascending=True, axis=0)
testNum = int(len(df) * -.1)
X_train = df['filtered_text'][:testNum].dropna()
y_train = pd.get_dummies(columns=['signal'],data=df['signal'])[:testNum].dropna().iloc[:, :]
test = df.loc[list(set(list(df.index)) - set(list(X_train.index)))]
X_test = test['filtered_text'].dropna()
y_test = test['signal'].dropna()

X_train, y_train = oversample(X_train, y_train)
# print(len(y_train['stay']))
# print(len(y_train['down']))
# print(len(y_train['up']))

# Recreate the signal variable
y_train["signal"] = np.nan

# for i, y in y_train.iterrows():
#     if str(type(y_train.loc[i])) == "<class 'pandas.core.frame.DataFrame'>": # If an index only has one observation, it draws up an error if we try to use the indexer agaon
#         # They're usually classed as a series while the ones with many observations are considered a df. This is a way to get
#         # rid of them
#         if y_train.loc[i].iloc[0, 0] == 1:
#             y_train.loc[i, "signal"] = "down"
#         elif y_train.loc[i].iloc[0, 1] == 1:
#             y_train.loc[i, "signal"] = "stay"
#         else:
#             y_train.loc[i, "signal"] = "up"
#     else: # If they only have one observation, we settle it here instead
#         if y_train.loc[i][0] == 1:
#               y_train.loc[i, "signal"] = "down"
#         elif y_train.loc[i][1] == 1:
#             y_train.loc[i, "signal"] = "stay"
#         else:
#             y_train.loc[i, "signal"] = "up"
            
X_train2 = X_train.reset_index(drop = True)
y_train2 = y_train['signal'].reset_index(drop = True)

data = pd.concat([X_train2, y_train2], axis = 1)
data.rename(columns = {"filtered_text":"doc"}, inplace = True)

X_test = X_test.reset_index(drop = True)
y_test = y_test.reset_index(drop = True)



train = data
test = pd.concat([X_test, y_test], axis = 1) 
test.rename(columns = {"filtered_text":"doc"}, inplace = True)
print(test.signal.str.count('stay').sum())
print(test.signal.str.count('up').sum())
print(test.signal.str.count('down').sum())

132
88
113


# BERT Model

In [15]:
DATA_COLUMN = 'doc'
LABEL_COLUMN = 'signal'
# label_list is the list of labels
label_list = ['up', 'down', 'stay']


train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)


BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"

def create_tokenizer_from_hub_module():
    with tf.Graph().as_default():
        bert_module = hub.Module(BERT_MODEL_HUB)
        tokenization_info = bert_module(signature="tokenization_info", as_dict=True)
        with tf.Session() as sess:
            vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],tokenization_info["do_lower_case"]])
      
    return bert.tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()

# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0327 22:33:23.839895 4711937472 saver.py:1483] Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Writing example 0 of 681


I0327 22:33:24.281285 4711937472 run_classifier.py:774] Writing example 0 of 681


INFO:tensorflow:*** Example ***


I0327 22:33:24.289957 4711937472 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0327 22:33:24.291429 4711937472 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] twitter t ##wt ##r suffers today apple aa ##pl party ##ing stock tops price tag , penny ##stock research uh ##al tr ##v ch ##k aa ##pl wealth ##mana ##gement , option millionaire ##s jimmy ##bo ##b aa ##pl pm , aa ##pl everyone continues estimate basically telling us continue buy long ##fr ##om cc , samsung in ##fr ##inge first stall long possible strategies nothing new day aa ##pl , swing trading approach free ##s watching markets live web ##ina ##r sunday aa ##pl goo ##g , ali ##ba ##ba compares major tech companies aa ##pl goo ##g ms ##ft , ha ##r considering investing cm ##i cbs ##h aa ##pl view , aa ##pl stock target achieved gain sell med ##elli ##n aa ##pl [SEP]


I0327 22:33:24.292907 4711937472 run_classifier.py:464] tokens: [CLS] twitter t ##wt ##r suffers today apple aa ##pl party ##ing stock tops price tag , penny ##stock research uh ##al tr ##v ch ##k aa ##pl wealth ##mana ##gement , option millionaire ##s jimmy ##bo ##b aa ##pl pm , aa ##pl everyone continues estimate basically telling us continue buy long ##fr ##om cc , samsung in ##fr ##inge first stall long possible strategies nothing new day aa ##pl , swing trading approach free ##s watching markets live web ##ina ##r sunday aa ##pl goo ##g , ali ##ba ##ba compares major tech companies aa ##pl goo ##g ms ##ft , ha ##r considering investing cm ##i cbs ##h aa ##pl view , aa ##pl stock target achieved gain sell med ##elli ##n aa ##pl [SEP]


INFO:tensorflow:input_ids: 101 10474 1056 26677 2099 17567 2651 6207 9779 24759 2283 2075 4518 13284 3976 6415 1010 10647 14758 2470 7910 2389 19817 2615 10381 2243 9779 24759 7177 24805 20511 1010 5724 19965 2015 5261 5092 2497 9779 24759 7610 1010 9779 24759 3071 4247 10197 10468 4129 2149 3613 4965 2146 19699 5358 10507 1010 19102 1999 19699 23496 2034 13498 2146 2825 9942 2498 2047 2154 9779 24759 1010 7370 6202 3921 2489 2015 3666 6089 2444 4773 3981 2099 4465 9779 24759 27571 2290 1010 4862 3676 3676 22963 2350 6627 3316 9779 24759 27571 2290 5796 6199 1010 5292 2099 6195 19920 4642 2072 6568 2232 9779 24759 3193 1010 9779 24759 4518 4539 4719 5114 5271 19960 13348 2078 9779 24759 102


I0327 22:33:24.294041 4711937472 run_classifier.py:465] input_ids: 101 10474 1056 26677 2099 17567 2651 6207 9779 24759 2283 2075 4518 13284 3976 6415 1010 10647 14758 2470 7910 2389 19817 2615 10381 2243 9779 24759 7177 24805 20511 1010 5724 19965 2015 5261 5092 2497 9779 24759 7610 1010 9779 24759 3071 4247 10197 10468 4129 2149 3613 4965 2146 19699 5358 10507 1010 19102 1999 19699 23496 2034 13498 2146 2825 9942 2498 2047 2154 9779 24759 1010 7370 6202 3921 2489 2015 3666 6089 2444 4773 3981 2099 4465 9779 24759 27571 2290 1010 4862 3676 3676 22963 2350 6627 3316 9779 24759 27571 2290 5796 6199 1010 5292 2099 6195 19920 4642 2072 6568 2232 9779 24759 3193 1010 9779 24759 4518 4539 4719 5114 5271 19960 13348 2078 9779 24759 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0327 22:33:24.295430 4711937472 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0327 22:33:24.296856 4711937472 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: down (id = 1)


I0327 22:33:24.298305 4711937472 run_classifier.py:468] label: down (id = 1)


INFO:tensorflow:*** Example ***


I0327 22:33:24.308400 4711937472 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0327 22:33:24.309595 4711937472 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] monday market outlook aa ##pl trades since june ideas bt ##u ch ##k y ##ho ##o db ##a ho ##v ir ##bt h , monday market outlook aa ##pl trades since june ideas bt ##u ch ##k y ##ho ##o db ##a ho ##v ir ##bt h , monday market outlook aa ##pl trades since june ideas bt ##u ch ##k y ##ho ##o db ##a ho ##v ir ##bt h , aa ##pl gt ##at , week left sign online wealth management co ##use w ucla ##ex ##tension investing spy baba aa ##pl , apple technical ##s monthly weekly daily fi ##bon ##ac ##ci levels update aa ##pl u ##b u ##b u ##b aa ##pl , aa ##pl apple record setting launch analysts predict [SEP]


I0327 22:33:24.310711 4711937472 run_classifier.py:464] tokens: [CLS] monday market outlook aa ##pl trades since june ideas bt ##u ch ##k y ##ho ##o db ##a ho ##v ir ##bt h , monday market outlook aa ##pl trades since june ideas bt ##u ch ##k y ##ho ##o db ##a ho ##v ir ##bt h , monday market outlook aa ##pl trades since june ideas bt ##u ch ##k y ##ho ##o db ##a ho ##v ir ##bt h , aa ##pl gt ##at , week left sign online wealth management co ##use w ucla ##ex ##tension investing spy baba aa ##pl , apple technical ##s monthly weekly daily fi ##bon ##ac ##ci levels update aa ##pl u ##b u ##b u ##b aa ##pl , aa ##pl apple record setting launch analysts predict [SEP]


INFO:tensorflow:input_ids: 101 6928 3006 17680 9779 24759 14279 2144 2238 4784 18411 2226 10381 2243 1061 6806 2080 16962 2050 7570 2615 20868 19279 1044 1010 6928 3006 17680 9779 24759 14279 2144 2238 4784 18411 2226 10381 2243 1061 6806 2080 16962 2050 7570 2615 20868 19279 1044 1010 6928 3006 17680 9779 24759 14279 2144 2238 4784 18411 2226 10381 2243 1061 6806 2080 16962 2050 7570 2615 20868 19279 1044 1010 9779 24759 14181 4017 1010 2733 2187 3696 3784 7177 2968 2522 8557 1059 12389 10288 29048 19920 8645 14208 9779 24759 1010 6207 4087 2015 7058 4882 3679 10882 11735 6305 6895 3798 10651 9779 24759 1057 2497 1057 2497 1057 2497 9779 24759 1010 9779 24759 6207 2501 4292 4888 18288 16014 102


I0327 22:33:24.312602 4711937472 run_classifier.py:465] input_ids: 101 6928 3006 17680 9779 24759 14279 2144 2238 4784 18411 2226 10381 2243 1061 6806 2080 16962 2050 7570 2615 20868 19279 1044 1010 6928 3006 17680 9779 24759 14279 2144 2238 4784 18411 2226 10381 2243 1061 6806 2080 16962 2050 7570 2615 20868 19279 1044 1010 6928 3006 17680 9779 24759 14279 2144 2238 4784 18411 2226 10381 2243 1061 6806 2080 16962 2050 7570 2615 20868 19279 1044 1010 9779 24759 14181 4017 1010 2733 2187 3696 3784 7177 2968 2522 8557 1059 12389 10288 29048 19920 8645 14208 9779 24759 1010 6207 4087 2015 7058 4882 3679 10882 11735 6305 6895 3798 10651 9779 24759 1057 2497 1057 2497 1057 2497 9779 24759 1010 9779 24759 6207 2501 4292 4888 18288 16014 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0327 22:33:24.314202 4711937472 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0327 22:33:24.315379 4711937472 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: down (id = 1)


I0327 22:33:24.317071 4711937472 run_classifier.py:468] label: down (id = 1)


INFO:tensorflow:*** Example ***


I0327 22:33:24.322412 4711937472 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0327 22:33:24.323896 4711937472 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] topping popping market true value ts ##la y ##ho ##o pc ##ln nfl ##x aa ##pl q ##q ##q it ##m ##n investing , make vega friend lesson portfolio management spy ru ##t es _ f aa ##pl , topping popping market true value ts ##la y ##ho ##o pc ##ln nfl ##x aa ##pl q ##q ##q it ##m ##n investing , aa ##pl companies get involved politics stock stocks stock ##act ##ion , aa ##pl sprint mv ##no ting add support iphone usage based plans next mac , apple issued security fix mac ##s install software update yet aa ##pl , new galaxy ##s fails impress mw ##c ss ##nl ##f s ##ne aa ##pl l ##ng ##vy , topping popping market true value [SEP]


I0327 22:33:24.325591 4711937472 run_classifier.py:464] tokens: [CLS] topping popping market true value ts ##la y ##ho ##o pc ##ln nfl ##x aa ##pl q ##q ##q it ##m ##n investing , make vega friend lesson portfolio management spy ru ##t es _ f aa ##pl , topping popping market true value ts ##la y ##ho ##o pc ##ln nfl ##x aa ##pl q ##q ##q it ##m ##n investing , aa ##pl companies get involved politics stock stocks stock ##act ##ion , aa ##pl sprint mv ##no ting add support iphone usage based plans next mac , apple issued security fix mac ##s install software update yet aa ##pl , new galaxy ##s fails impress mw ##c ss ##nl ##f s ##ne aa ##pl l ##ng ##vy , topping popping market true value [SEP]


INFO:tensorflow:input_ids: 101 22286 20095 3006 2995 3643 24529 2721 1061 6806 2080 7473 19666 5088 2595 9779 24759 1053 4160 4160 2009 2213 2078 19920 1010 2191 15942 2767 10800 11103 2968 8645 21766 2102 9686 1035 1042 9779 24759 1010 22286 20095 3006 2995 3643 24529 2721 1061 6806 2080 7473 19666 5088 2595 9779 24759 1053 4160 4160 2009 2213 2078 19920 1010 9779 24759 3316 2131 2920 4331 4518 15768 4518 18908 3258 1010 9779 24759 9043 19842 3630 28642 5587 2490 18059 8192 2241 3488 2279 6097 1010 6207 3843 3036 8081 6097 2015 16500 4007 10651 2664 9779 24759 1010 2047 9088 2015 11896 17894 12464 2278 7020 20554 2546 1055 2638 9779 24759 1048 3070 10736 1010 22286 20095 3006 2995 3643 102


I0327 22:33:24.326750 4711937472 run_classifier.py:465] input_ids: 101 22286 20095 3006 2995 3643 24529 2721 1061 6806 2080 7473 19666 5088 2595 9779 24759 1053 4160 4160 2009 2213 2078 19920 1010 2191 15942 2767 10800 11103 2968 8645 21766 2102 9686 1035 1042 9779 24759 1010 22286 20095 3006 2995 3643 24529 2721 1061 6806 2080 7473 19666 5088 2595 9779 24759 1053 4160 4160 2009 2213 2078 19920 1010 9779 24759 3316 2131 2920 4331 4518 15768 4518 18908 3258 1010 9779 24759 9043 19842 3630 28642 5587 2490 18059 8192 2241 3488 2279 6097 1010 6207 3843 3036 8081 6097 2015 16500 4007 10651 2664 9779 24759 1010 2047 9088 2015 11896 17894 12464 2278 7020 20554 2546 1055 2638 9779 24759 1048 3070 10736 1010 22286 20095 3006 2995 3643 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0327 22:33:24.327840 4711937472 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0327 22:33:24.329153 4711937472 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: down (id = 1)


I0327 22:33:24.330133 4711937472 run_classifier.py:468] label: down (id = 1)


INFO:tensorflow:*** Example ***


I0327 22:33:24.337411 4711937472 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0327 22:33:24.338414 4711937472 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] aa ##pl long ##s yesterday green thanks ibm news changed sl protect profit , aa ##pl bull ##ish alert d ##ma d ##ma d ##ma d ##ma d ##ma price , apple buy tesla really ts ##la aa ##pl , aa ##pl je ##ez would today without announcement want op ##ex , apple ibm announce landmark deal develop apps aa ##pl tech stocks , fly hours move ##rs aa ##pl tech stocks , stocks watch n ##v ##gt f ##n ##ma fm ##cc new ##l mine aa ##pl goo ##g q ##q ##q , trade year aa ##pl already ibm ba ##c i ##w ##m t ##f _ f options ##tra ##te ##gies beth ##eh ##ouse picks http , apple ibm kill blackberry bb ##ry stock [SEP]


I0327 22:33:24.339321 4711937472 run_classifier.py:464] tokens: [CLS] aa ##pl long ##s yesterday green thanks ibm news changed sl protect profit , aa ##pl bull ##ish alert d ##ma d ##ma d ##ma d ##ma d ##ma price , apple buy tesla really ts ##la aa ##pl , aa ##pl je ##ez would today without announcement want op ##ex , apple ibm announce landmark deal develop apps aa ##pl tech stocks , fly hours move ##rs aa ##pl tech stocks , stocks watch n ##v ##gt f ##n ##ma fm ##cc new ##l mine aa ##pl goo ##g q ##q ##q , trade year aa ##pl already ibm ba ##c i ##w ##m t ##f _ f options ##tra ##te ##gies beth ##eh ##ouse picks http , apple ibm kill blackberry bb ##ry stock [SEP]


INFO:tensorflow:input_ids: 101 9779 24759 2146 2015 7483 2665 4283 9980 2739 2904 22889 4047 5618 1010 9779 24759 7087 4509 9499 1040 2863 1040 2863 1040 2863 1040 2863 1040 2863 3976 1010 6207 4965 26060 2428 24529 2721 9779 24759 1010 9779 24759 15333 9351 2052 2651 2302 8874 2215 6728 10288 1010 6207 9980 14970 8637 3066 4503 18726 9779 24759 6627 15768 1010 4875 2847 2693 2869 9779 24759 6627 15768 1010 15768 3422 1050 2615 13512 1042 2078 2863 4718 9468 2047 2140 3067 9779 24759 27571 2290 1053 4160 4160 1010 3119 2095 9779 24759 2525 9980 8670 2278 1045 2860 2213 1056 2546 1035 1042 7047 6494 2618 17252 7014 11106 15441 11214 8299 1010 6207 9980 3102 25935 22861 2854 4518 102


I0327 22:33:24.340430 4711937472 run_classifier.py:465] input_ids: 101 9779 24759 2146 2015 7483 2665 4283 9980 2739 2904 22889 4047 5618 1010 9779 24759 7087 4509 9499 1040 2863 1040 2863 1040 2863 1040 2863 1040 2863 3976 1010 6207 4965 26060 2428 24529 2721 9779 24759 1010 9779 24759 15333 9351 2052 2651 2302 8874 2215 6728 10288 1010 6207 9980 14970 8637 3066 4503 18726 9779 24759 6627 15768 1010 4875 2847 2693 2869 9779 24759 6627 15768 1010 15768 3422 1050 2615 13512 1042 2078 2863 4718 9468 2047 2140 3067 9779 24759 27571 2290 1053 4160 4160 1010 3119 2095 9779 24759 2525 9980 8670 2278 1045 2860 2213 1056 2546 1035 1042 7047 6494 2618 17252 7014 11106 15441 11214 8299 1010 6207 9980 3102 25935 22861 2854 4518 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0327 22:33:24.341858 4711937472 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0327 22:33:24.344012 4711937472 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: down (id = 1)


I0327 22:33:24.344806 4711937472 run_classifier.py:468] label: down (id = 1)


INFO:tensorflow:*** Example ***


I0327 22:33:24.354522 4711937472 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0327 22:33:24.355381 4711937472 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] aa ##pl holding oct u g mac ##book pro quad core w yuan dev ##al ##uation components get cheaper china labor cheaper , p stocks performance hal cat aa ##pl ap ##c ox ##y d ##vn hp ##q cop sl ##b int ##c met cv ##x t ##w ##x dow x ##om , goo ##g aa ##pl t ##wt ##r monday market rec ##ap update option millionaire ##s stock options chat ##room today rally cam , aa ##pl ax ##p baba cv ##x f ##b mon r ##l x ##om see ya tomorrow bless day watching baba earning morning , aa ##pl trade alert , may paying much higher spreads us tech giants aa ##pl goo ##gl ms ##ft f ##b t ##wt ##r spread ##bet [SEP]


I0327 22:33:24.356569 4711937472 run_classifier.py:464] tokens: [CLS] aa ##pl holding oct u g mac ##book pro quad core w yuan dev ##al ##uation components get cheaper china labor cheaper , p stocks performance hal cat aa ##pl ap ##c ox ##y d ##vn hp ##q cop sl ##b int ##c met cv ##x t ##w ##x dow x ##om , goo ##g aa ##pl t ##wt ##r monday market rec ##ap update option millionaire ##s stock options chat ##room today rally cam , aa ##pl ax ##p baba cv ##x f ##b mon r ##l x ##om see ya tomorrow bless day watching baba earning morning , aa ##pl trade alert , may paying much higher spreads us tech giants aa ##pl goo ##gl ms ##ft f ##b t ##wt ##r spread ##bet [SEP]


INFO:tensorflow:input_ids: 101 9779 24759 3173 13323 1057 1043 6097 8654 4013 17718 4563 1059 11237 16475 2389 14505 6177 2131 16269 2859 4450 16269 1010 1052 15768 2836 11085 4937 9779 24759 9706 2278 23060 2100 1040 16022 6522 4160 8872 22889 2497 20014 2278 2777 26226 2595 1056 2860 2595 23268 1060 5358 1010 27571 2290 9779 24759 1056 26677 2099 6928 3006 28667 9331 10651 5724 19965 2015 4518 7047 11834 9954 2651 8320 11503 1010 9779 24759 22260 2361 14208 26226 2595 1042 2497 12256 1054 2140 1060 5358 2156 8038 4826 19994 2154 3666 14208 7414 2851 1010 9779 24759 3119 9499 1010 2089 7079 2172 3020 20861 2149 6627 7230 9779 24759 27571 23296 5796 6199 1042 2497 1056 26677 2099 3659 20915 102


I0327 22:33:24.358036 4711937472 run_classifier.py:465] input_ids: 101 9779 24759 3173 13323 1057 1043 6097 8654 4013 17718 4563 1059 11237 16475 2389 14505 6177 2131 16269 2859 4450 16269 1010 1052 15768 2836 11085 4937 9779 24759 9706 2278 23060 2100 1040 16022 6522 4160 8872 22889 2497 20014 2278 2777 26226 2595 1056 2860 2595 23268 1060 5358 1010 27571 2290 9779 24759 1056 26677 2099 6928 3006 28667 9331 10651 5724 19965 2015 4518 7047 11834 9954 2651 8320 11503 1010 9779 24759 22260 2361 14208 26226 2595 1042 2497 12256 1054 2140 1060 5358 2156 8038 4826 19994 2154 3666 14208 7414 2851 1010 9779 24759 3119 9499 1010 2089 7079 2172 3020 20861 2149 6627 7230 9779 24759 27571 23296 5796 6199 1042 2497 1056 26677 2099 3659 20915 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0327 22:33:24.359343 4711937472 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0327 22:33:24.360352 4711937472 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: down (id = 1)


I0327 22:33:24.361540 4711937472 run_classifier.py:468] label: down (id = 1)


INFO:tensorflow:Writing example 0 of 48


I0327 22:33:28.118936 4711937472 run_classifier.py:774] Writing example 0 of 48


INFO:tensorflow:*** Example ***


I0327 22:33:28.132787 4711937472 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0327 22:33:28.133950 4711937472 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] sen ##al twitter en aa ##pl , aa ##pl ts ##la freaking awesome day chat options money times nailed good roll raining ##mon ##ey , brandon clay interviewed options ##tra ##te ##gies portfolio ##mana ##gement macro ##en ##vir ##on ##ment ibm aa ##pl http , brandon clay interviewed options ##tra ##te ##gies portfolio ##mana ##gement macro ##en ##vir ##on ##ment ibm aa ##pl http , beat people trading aa ##pl ranking , brandon clay interviewed options ##tra ##te ##gies portfolio ##mana ##gement macro ##en ##vir ##on ##ment ibm aa ##pl http , brandon clay interviewed options ##tra ##te ##gies portfolio ##mana ##gement macro ##en ##vir ##on ##ment ibm aa ##pl http , aa ##pl keep eye closes could first successful close months would v bull ##ish [SEP]


I0327 22:33:28.135430 4711937472 run_classifier.py:464] tokens: [CLS] sen ##al twitter en aa ##pl , aa ##pl ts ##la freaking awesome day chat options money times nailed good roll raining ##mon ##ey , brandon clay interviewed options ##tra ##te ##gies portfolio ##mana ##gement macro ##en ##vir ##on ##ment ibm aa ##pl http , brandon clay interviewed options ##tra ##te ##gies portfolio ##mana ##gement macro ##en ##vir ##on ##ment ibm aa ##pl http , beat people trading aa ##pl ranking , brandon clay interviewed options ##tra ##te ##gies portfolio ##mana ##gement macro ##en ##vir ##on ##ment ibm aa ##pl http , brandon clay interviewed options ##tra ##te ##gies portfolio ##mana ##gement macro ##en ##vir ##on ##ment ibm aa ##pl http , aa ##pl keep eye closes could first successful close months would v bull ##ish [SEP]


INFO:tensorflow:input_ids: 101 12411 2389 10474 4372 9779 24759 1010 9779 24759 24529 2721 13847 12476 2154 11834 7047 2769 2335 26304 2204 4897 24057 8202 3240 1010 8825 5726 10263 7047 6494 2618 17252 11103 24805 20511 26632 2368 21663 2239 3672 9980 9779 24759 8299 1010 8825 5726 10263 7047 6494 2618 17252 11103 24805 20511 26632 2368 21663 2239 3672 9980 9779 24759 8299 1010 3786 2111 6202 9779 24759 5464 1010 8825 5726 10263 7047 6494 2618 17252 11103 24805 20511 26632 2368 21663 2239 3672 9980 9779 24759 8299 1010 8825 5726 10263 7047 6494 2618 17252 11103 24805 20511 26632 2368 21663 2239 3672 9980 9779 24759 8299 1010 9779 24759 2562 3239 14572 2071 2034 3144 2485 2706 2052 1058 7087 4509 102


I0327 22:33:28.136502 4711937472 run_classifier.py:465] input_ids: 101 12411 2389 10474 4372 9779 24759 1010 9779 24759 24529 2721 13847 12476 2154 11834 7047 2769 2335 26304 2204 4897 24057 8202 3240 1010 8825 5726 10263 7047 6494 2618 17252 11103 24805 20511 26632 2368 21663 2239 3672 9980 9779 24759 8299 1010 8825 5726 10263 7047 6494 2618 17252 11103 24805 20511 26632 2368 21663 2239 3672 9980 9779 24759 8299 1010 3786 2111 6202 9779 24759 5464 1010 8825 5726 10263 7047 6494 2618 17252 11103 24805 20511 26632 2368 21663 2239 3672 9980 9779 24759 8299 1010 8825 5726 10263 7047 6494 2618 17252 11103 24805 20511 26632 2368 21663 2239 3672 9980 9779 24759 8299 1010 9779 24759 2562 3239 14572 2071 2034 3144 2485 2706 2052 1058 7087 4509 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0327 22:33:28.138009 4711937472 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0327 22:33:28.139785 4711937472 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: up (id = 0)


I0327 22:33:28.140797 4711937472 run_classifier.py:468] label: up (id = 0)


INFO:tensorflow:*** Example ***


I0327 22:33:28.148363 4711937472 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0327 22:33:28.149269 4711937472 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] different news tab ##s open aa ##pl db bp give , breaking hut ##ham ol ##ayan elected ibm board directors aa ##pl ts ##la goo ##gl , breaking ex thomas cook boss green lands top ibm role aa ##pl ts ##la goo ##gl , top ##tick ##ert ##wee ##ts vr ##x spy aa ##pl t ##wt ##r w ##t ##w fin ##tech fins ##er ##v , q ##q ##q h ##ft al ##gos triggered buy sigma x cross ##finder at ##s l ##x p quan ##t ms ##ft f ##b gp ##ro am ##z ##n goo ##g aa ##pl ts ##la nfl ##x , aa ##pl apple inc day high aa ##pl jp ##m b ##lc ##m pv ##g aa ##pl stocks finance , pp ##g [SEP]


I0327 22:33:28.150125 4711937472 run_classifier.py:464] tokens: [CLS] different news tab ##s open aa ##pl db bp give , breaking hut ##ham ol ##ayan elected ibm board directors aa ##pl ts ##la goo ##gl , breaking ex thomas cook boss green lands top ibm role aa ##pl ts ##la goo ##gl , top ##tick ##ert ##wee ##ts vr ##x spy aa ##pl t ##wt ##r w ##t ##w fin ##tech fins ##er ##v , q ##q ##q h ##ft al ##gos triggered buy sigma x cross ##finder at ##s l ##x p quan ##t ms ##ft f ##b gp ##ro am ##z ##n goo ##g aa ##pl ts ##la nfl ##x , aa ##pl apple inc day high aa ##pl jp ##m b ##lc ##m pv ##g aa ##pl stocks finance , pp ##g [SEP]


INFO:tensorflow:input_ids: 101 2367 2739 21628 2015 2330 9779 24759 16962 17531 2507 1010 4911 12570 3511 19330 25868 2700 9980 2604 5501 9779 24759 24529 2721 27571 23296 1010 4911 4654 2726 5660 5795 2665 4915 2327 9980 2535 9779 24759 24529 2721 27571 23296 1010 2327 26348 8743 28394 3215 27830 2595 8645 9779 24759 1056 26677 2099 1059 2102 2860 10346 15007 18564 2121 2615 1010 1053 4160 4160 1044 6199 2632 12333 13330 4965 13201 1060 2892 23695 2012 2015 1048 2595 1052 24110 2102 5796 6199 1042 2497 14246 3217 2572 2480 2078 27571 2290 9779 24759 24529 2721 5088 2595 1010 9779 24759 6207 4297 2154 2152 9779 24759 16545 2213 1038 15472 2213 26189 2290 9779 24759 15768 5446 1010 4903 2290 102


I0327 22:33:28.151264 4711937472 run_classifier.py:465] input_ids: 101 2367 2739 21628 2015 2330 9779 24759 16962 17531 2507 1010 4911 12570 3511 19330 25868 2700 9980 2604 5501 9779 24759 24529 2721 27571 23296 1010 4911 4654 2726 5660 5795 2665 4915 2327 9980 2535 9779 24759 24529 2721 27571 23296 1010 2327 26348 8743 28394 3215 27830 2595 8645 9779 24759 1056 26677 2099 1059 2102 2860 10346 15007 18564 2121 2615 1010 1053 4160 4160 1044 6199 2632 12333 13330 4965 13201 1060 2892 23695 2012 2015 1048 2595 1052 24110 2102 5796 6199 1042 2497 14246 3217 2572 2480 2078 27571 2290 9779 24759 24529 2721 5088 2595 1010 9779 24759 6207 4297 2154 2152 9779 24759 16545 2213 1038 15472 2213 26189 2290 9779 24759 15768 5446 1010 4903 2290 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0327 22:33:28.153387 4711937472 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0327 22:33:28.154762 4711937472 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: stay (id = 2)


I0327 22:33:28.155626 4711937472 run_classifier.py:468] label: stay (id = 2)


INFO:tensorflow:*** Example ***


I0327 22:33:28.160637 4711937472 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0327 22:33:28.161594 4711937472 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] ahead us tech giants check analyst say goo ##g t ##wt ##r aa ##pl , today earnings could create market cap milestone ##s am ##z ##n b ms ##ft b goo ##gl b joining aa ##pl , aa ##pl sell luca mae ##st ##ri senior vice pre ##s cf ##o , aa ##pl , aa ##pl iphone sales could di ##sa ##point ##ing , em ##br rolling ##gg ##gg ##g get ##tt em ##mm cm ##go va ##pe am ##zz td ##ey pv ##sp sip ##c tb ##ev nr ##ti ac ##ol aa ##pl goo ##gl l ##ken spy g ##ds ##i ur ##bf , real audio video luxury luxury ##life ##sty ##le nyc shopping aa ##pl , cong ##rat ##s bulls months taking bottom paid [SEP]


I0327 22:33:28.162992 4711937472 run_classifier.py:464] tokens: [CLS] ahead us tech giants check analyst say goo ##g t ##wt ##r aa ##pl , today earnings could create market cap milestone ##s am ##z ##n b ms ##ft b goo ##gl b joining aa ##pl , aa ##pl sell luca mae ##st ##ri senior vice pre ##s cf ##o , aa ##pl , aa ##pl iphone sales could di ##sa ##point ##ing , em ##br rolling ##gg ##gg ##g get ##tt em ##mm cm ##go va ##pe am ##zz td ##ey pv ##sp sip ##c tb ##ev nr ##ti ac ##ol aa ##pl goo ##gl l ##ken spy g ##ds ##i ur ##bf , real audio video luxury luxury ##life ##sty ##le nyc shopping aa ##pl , cong ##rat ##s bulls months taking bottom paid [SEP]


INFO:tensorflow:input_ids: 101 3805 2149 6627 7230 4638 12941 2360 27571 2290 1056 26677 2099 9779 24759 1010 2651 16565 2071 3443 3006 6178 19199 2015 2572 2480 2078 1038 5796 6199 1038 27571 23296 1038 5241 9779 24759 1010 9779 24759 5271 15604 11530 3367 3089 3026 3580 3653 2015 12935 2080 1010 9779 24759 1010 9779 24759 18059 4341 2071 4487 3736 8400 2075 1010 7861 19892 5291 13871 13871 2290 2131 4779 7861 7382 4642 3995 12436 5051 2572 13213 14595 3240 26189 13102 10668 2278 26419 6777 17212 3775 9353 4747 9779 24759 27571 23296 1048 7520 8645 1043 5104 2072 24471 29292 1010 2613 5746 2678 9542 9542 15509 21756 2571 16392 6023 9779 24759 1010 26478 8609 2015 12065 2706 2635 3953 3825 102


I0327 22:33:28.164615 4711937472 run_classifier.py:465] input_ids: 101 3805 2149 6627 7230 4638 12941 2360 27571 2290 1056 26677 2099 9779 24759 1010 2651 16565 2071 3443 3006 6178 19199 2015 2572 2480 2078 1038 5796 6199 1038 27571 23296 1038 5241 9779 24759 1010 9779 24759 5271 15604 11530 3367 3089 3026 3580 3653 2015 12935 2080 1010 9779 24759 1010 9779 24759 18059 4341 2071 4487 3736 8400 2075 1010 7861 19892 5291 13871 13871 2290 2131 4779 7861 7382 4642 3995 12436 5051 2572 13213 14595 3240 26189 13102 10668 2278 26419 6777 17212 3775 9353 4747 9779 24759 27571 23296 1048 7520 8645 1043 5104 2072 24471 29292 1010 2613 5746 2678 9542 9542 15509 21756 2571 16392 6023 9779 24759 1010 26478 8609 2015 12065 2706 2635 3953 3825 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0327 22:33:28.166037 4711937472 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0327 22:33:28.167016 4711937472 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: up (id = 0)


I0327 22:33:28.168149 4711937472 run_classifier.py:468] label: up (id = 0)


INFO:tensorflow:*** Example ***


I0327 22:33:28.175327 4711937472 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0327 22:33:28.176578 4711937472 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] assets seeing jump t ##wee ##ts aa ##pl am ##z ##n dj ##ia eu ##rus ##d goo ##gl , aa ##pl report apple music near release android app , wi ##x smart new source revenue stocks trading investing f ##b aa ##pl , apple new million annual revenue stream growing aa ##pl , watch week ahead monday oct aa ##pl , simple apple strategy right even simpler aa ##pl strategy buy hold aaa ##aa ##aa ##and scene , dust dire ##xi ##on daily gold miners bear x shares w ##k high dust aa ##pl fx ##e jo dust nas ##da ##q nas ##da ##q , beat people trading aa ##pl ranking holy shit trading game , china drops rates amazon drops profit markets go wild aa [SEP]


I0327 22:33:28.178428 4711937472 run_classifier.py:464] tokens: [CLS] assets seeing jump t ##wee ##ts aa ##pl am ##z ##n dj ##ia eu ##rus ##d goo ##gl , aa ##pl report apple music near release android app , wi ##x smart new source revenue stocks trading investing f ##b aa ##pl , apple new million annual revenue stream growing aa ##pl , watch week ahead monday oct aa ##pl , simple apple strategy right even simpler aa ##pl strategy buy hold aaa ##aa ##aa ##and scene , dust dire ##xi ##on daily gold miners bear x shares w ##k high dust aa ##pl fx ##e jo dust nas ##da ##q nas ##da ##q , beat people trading aa ##pl ranking holy shit trading game , china drops rates amazon drops profit markets go wild aa [SEP]


INFO:tensorflow:input_ids: 101 7045 3773 5376 1056 28394 3215 9779 24759 2572 2480 2078 6520 2401 7327 7946 2094 27571 23296 1010 9779 24759 3189 6207 2189 2379 2713 11924 10439 1010 15536 2595 6047 2047 3120 6599 15768 6202 19920 1042 2497 9779 24759 1010 6207 2047 2454 3296 6599 5460 3652 9779 24759 1010 3422 2733 3805 6928 13323 9779 24759 1010 3722 6207 5656 2157 2130 16325 9779 24759 5656 4965 2907 13360 11057 11057 5685 3496 1010 6497 18704 9048 2239 3679 2751 11257 4562 1060 6661 1059 2243 2152 6497 9779 24759 23292 2063 8183 6497 17235 2850 4160 17235 2850 4160 1010 3786 2111 6202 9779 24759 5464 4151 4485 6202 2208 1010 2859 9010 6165 9733 9010 5618 6089 2175 3748 9779 102


I0327 22:33:28.179703 4711937472 run_classifier.py:465] input_ids: 101 7045 3773 5376 1056 28394 3215 9779 24759 2572 2480 2078 6520 2401 7327 7946 2094 27571 23296 1010 9779 24759 3189 6207 2189 2379 2713 11924 10439 1010 15536 2595 6047 2047 3120 6599 15768 6202 19920 1042 2497 9779 24759 1010 6207 2047 2454 3296 6599 5460 3652 9779 24759 1010 3422 2733 3805 6928 13323 9779 24759 1010 3722 6207 5656 2157 2130 16325 9779 24759 5656 4965 2907 13360 11057 11057 5685 3496 1010 6497 18704 9048 2239 3679 2751 11257 4562 1060 6661 1059 2243 2152 6497 9779 24759 23292 2063 8183 6497 17235 2850 4160 17235 2850 4160 1010 3786 2111 6202 9779 24759 5464 4151 4485 6202 2208 1010 2859 9010 6165 9733 9010 5618 6089 2175 3748 9779 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0327 22:33:28.181546 4711937472 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0327 22:33:28.182678 4711937472 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: up (id = 0)


I0327 22:33:28.183552 4711937472 run_classifier.py:468] label: up (id = 0)


INFO:tensorflow:*** Example ***


I0327 22:33:28.192659 4711937472 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0327 22:33:28.194155 4711937472 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] aa ##pl apple inc filing ##s aa ##pl i ##w ##m dia pg aa ##pl invest finance , apple refusing unlock iphone us law enforcement aa ##pl , am ##d looks promising aa ##pl ms ##ft , top ##tick ##ert ##wee ##ts vr ##x aa ##pl spy b ##ds ##i t ##wt ##r fin ##tech fins ##er ##v , gil ##d q aa ##pl q earnings market close tomorrow two heavy weights respective industries , the ##st ##ree ##t apple tv future tv tim cook rocker aa ##pl , video traders always use stops dj ##ia spy es _ f n ##q _ f q ##q ##q v ##xx u ##wt ##i aa ##pl , sky ##car _ jack sal ##uting great nor ##cal tech co [SEP]


I0327 22:33:28.195372 4711937472 run_classifier.py:464] tokens: [CLS] aa ##pl apple inc filing ##s aa ##pl i ##w ##m dia pg aa ##pl invest finance , apple refusing unlock iphone us law enforcement aa ##pl , am ##d looks promising aa ##pl ms ##ft , top ##tick ##ert ##wee ##ts vr ##x aa ##pl spy b ##ds ##i t ##wt ##r fin ##tech fins ##er ##v , gil ##d q aa ##pl q earnings market close tomorrow two heavy weights respective industries , the ##st ##ree ##t apple tv future tv tim cook rocker aa ##pl , video traders always use stops dj ##ia spy es _ f n ##q _ f q ##q ##q v ##xx u ##wt ##i aa ##pl , sky ##car _ jack sal ##uting great nor ##cal tech co [SEP]


INFO:tensorflow:input_ids: 101 9779 24759 6207 4297 15242 2015 9779 24759 1045 2860 2213 22939 18720 9779 24759 15697 5446 1010 6207 11193 19829 18059 2149 2375 7285 9779 24759 1010 2572 2094 3504 10015 9779 24759 5796 6199 1010 2327 26348 8743 28394 3215 27830 2595 9779 24759 8645 1038 5104 2072 1056 26677 2099 10346 15007 18564 2121 2615 1010 13097 2094 1053 9779 24759 1053 16565 3006 2485 4826 2048 3082 15871 7972 6088 1010 1996 3367 9910 2102 6207 2694 2925 2694 5199 5660 24779 9779 24759 1010 2678 13066 2467 2224 6762 6520 2401 8645 9686 1035 1042 1050 4160 1035 1042 1053 4160 4160 1058 20348 1057 26677 2072 9779 24759 1010 3712 10010 1035 2990 16183 20807 2307 4496 9289 6627 2522 102


I0327 22:33:28.196349 4711937472 run_classifier.py:465] input_ids: 101 9779 24759 6207 4297 15242 2015 9779 24759 1045 2860 2213 22939 18720 9779 24759 15697 5446 1010 6207 11193 19829 18059 2149 2375 7285 9779 24759 1010 2572 2094 3504 10015 9779 24759 5796 6199 1010 2327 26348 8743 28394 3215 27830 2595 9779 24759 8645 1038 5104 2072 1056 26677 2099 10346 15007 18564 2121 2615 1010 13097 2094 1053 9779 24759 1053 16565 3006 2485 4826 2048 3082 15871 7972 6088 1010 1996 3367 9910 2102 6207 2694 2925 2694 5199 5660 24779 9779 24759 1010 2678 13066 2467 2224 6762 6520 2401 8645 9686 1035 1042 1050 4160 1035 1042 1053 4160 4160 1058 20348 1057 26677 2072 9779 24759 1010 3712 10010 1035 2990 16183 20807 2307 4496 9289 6627 2522 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0327 22:33:28.197562 4711937472 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0327 22:33:28.198927 4711937472 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: down (id = 1)


I0327 22:33:28.200636 4711937472 run_classifier.py:468] label: down (id = 1)


In [16]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
  # """Creates a classification model."""
    bert_module = hub.Module(
      BERT_MODEL_HUB,
      trainable=True)
    bert_inputs = dict(
      input_ids=input_ids,
      input_mask=input_mask,
      segment_ids=segment_ids)
    bert_outputs = bert_module(
      inputs=bert_inputs,
      signature="tokens",
      as_dict=True)

    # Use "pooled_output" for classification tasks on an entire sentence.
    # Use "sequence_outputs" for token-level output.
    output_layer = bert_outputs["pooled_output"]

    hidden_size = output_layer.shape[-1].value

    # Create our own layer to tune for politeness data.
    output_weights = tf.get_variable(
      "output_weights", [num_labels, hidden_size],
      initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
      "output_bias", [num_labels], initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):

    # Dropout helps prevent overfitting
        output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        # Convert labels into one-hot encoding
        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

        predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
        # If we're predicting, we want predicted labels and the probabiltiies.
        if is_predicting:
            return (predicted_labels, log_probs)

    # If we're train/eval, compute loss between predicted and actual label
    per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
    loss = tf.reduce_mean(per_example_loss)
    return (loss, predicted_labels, log_probs)

# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
    # """Returns `model_fn` closure for TPUEstimator."""
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
      #   """The `model_fn` for TPUEstimator."""
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)

        # TRAIN and EVAL
        if not is_predicting:
            (loss, predicted_labels, log_probs) = create_model(
                is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

            train_op = bert.optimization.create_optimizer(
              loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

          # Calculate evaluation metrics. 
            def metric_fn(label_ids, predicted_labels):
                accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
                return {"eval_accuracy": accuracy}

            eval_metrics = metric_fn(label_ids, predicted_labels)

            if mode == tf.estimator.ModeKeys.TRAIN:
                return tf.estimator.EstimatorSpec(mode=mode,
                loss=loss,
                train_op=train_op)
            else:
                return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics)
        else:
            (predicted_labels, log_probs) = create_model(is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

            predictions = {'probabilities': log_probs, 'labels': predicted_labels}
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)

        # Return the actual model function in the closure
    return model_fn

In [18]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 16
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where the learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig()

model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})

# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)



W0327 22:45:16.536961 4711937472 estimator.py:1760] Using temporary folder as model directory: /var/folders/y7/ggkv4d0s66sb_d533y_dgl6m0000gn/T/tmpx10w49rc


INFO:tensorflow:Using config: {'_model_dir': '/var/folders/y7/ggkv4d0s66sb_d533y_dgl6m0000gn/T/tmpx10w49rc', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1413f4da0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


I0327 22:45:16.538812 4711937472 estimator.py:201] Using config: {'_model_dir': '/var/folders/y7/ggkv4d0s66sb_d533y_dgl6m0000gn/T/tmpx10w49rc', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x1413f4da0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


Beginning Training!
INFO:tensorflow:Calling model_fn.


I0327 22:45:16.843706 4711937472 estimator.py:1111] Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0327 22:45:19.439677 4711937472 saver.py:1483] Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Done calling model_fn.


I0327 22:45:25.462770 4711937472 estimator.py:1113] Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


I0327 22:45:25.465264 4711937472 basic_session_run_hooks.py:527] Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


I0327 22:45:28.734106 4711937472 monitored_session.py:222] Graph was finalized.


INFO:tensorflow:Running local_init_op.


I0327 22:45:32.609914 4711937472 session_manager.py:491] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0327 22:45:32.825582 4711937472 session_manager.py:493] Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into /var/folders/y7/ggkv4d0s66sb_d533y_dgl6m0000gn/T/tmpx10w49rc/model.ckpt.


I0327 22:45:39.862158 4711937472 basic_session_run_hooks.py:594] Saving checkpoints for 0 into /var/folders/y7/ggkv4d0s66sb_d533y_dgl6m0000gn/T/tmpx10w49rc/model.ckpt.


INFO:tensorflow:loss = 1.6797863, step = 1


I0327 22:45:56.952554 4711937472 basic_session_run_hooks.py:249] loss = 1.6797863, step = 1


KeyboardInterrupt: 

## Evaluate model on testing data

In [12]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

estimator.evaluate(input_fn=test_input_fn, steps=None)

INFO:tensorflow:Calling model_fn.


I0320 21:48:23.537125 4476413376 estimator.py:1111] Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0320 21:48:26.342192 4476413376 saver.py:1483] Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Done calling model_fn.


I0320 21:48:32.306682 4476413376 estimator.py:1113] Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2019-03-21T01:48:32Z


I0320 21:48:32.324270 4476413376 evaluation.py:257] Starting evaluation at 2019-03-21T01:48:32Z


INFO:tensorflow:Graph was finalized.


I0320 21:48:33.355464 4476413376 monitored_session.py:222] Graph was finalized.


Instructions for updating:
Use standard file APIs to check for files with this prefix.


W0320 21:48:33.357040 4476413376 deprecation.py:323] From /Users/rohan/anaconda3/envs/GradSchool/lib/python3.7/site-packages/tensorflow/python/training/saver.py:1266: checkpoint_exists (from tensorflow.python.training.checkpoint_management) is deprecated and will be removed in a future version.
Instructions for updating:
Use standard file APIs to check for files with this prefix.


INFO:tensorflow:Restoring parameters from /var/folders/y7/ggkv4d0s66sb_d533y_dgl6m0000gn/T/tmpuxmqexus/model.ckpt-0


I0320 21:48:33.358529 4476413376 saver.py:1270] Restoring parameters from /var/folders/y7/ggkv4d0s66sb_d533y_dgl6m0000gn/T/tmpuxmqexus/model.ckpt-0


INFO:tensorflow:Running local_init_op.


I0320 21:48:35.461864 4476413376 session_manager.py:491] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0320 21:48:35.671769 4476413376 session_manager.py:493] Done running local_init_op.


INFO:tensorflow:Finished evaluation at 2019-03-21-01:51:34


I0320 21:51:34.732543 4476413376 evaluation.py:277] Finished evaluation at 2019-03-21-01:51:34


INFO:tensorflow:Saving dict for global step 0: eval_accuracy = 0.15580986, global_step = 0, loss = 1.3047361


I0320 21:51:34.733775 4476413376 estimator.py:1979] Saving dict for global step 0: eval_accuracy = 0.15580986, global_step = 0, loss = 1.3047361


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 0: /var/folders/y7/ggkv4d0s66sb_d533y_dgl6m0000gn/T/tmpuxmqexus/model.ckpt-0


I0320 21:51:36.768786 4476413376 estimator.py:2039] Saving 'checkpoint_path' summary for global step 0: /var/folders/y7/ggkv4d0s66sb_d533y_dgl6m0000gn/T/tmpuxmqexus/model.ckpt-0


{'eval_accuracy': 0.15580986, 'loss': 1.3047361, 'global_step': 0}