## Data Preprocessing

In [None]:
import pandas as pd
import re

df = pd.read_csv('/content/gdrive/My Drive/Cognitive Computing/texts_and_fin.csv')

In [None]:
# Define a function to count words in the docs
def word_count(s):
    a = s.split(' ')
    return len(a)

df['txt_len'] = df['text'].map(word_count)
df['txt_len'].hist()
len(df)

# Get rid of documents with more than 20,000 words
df2 = df[df['txt_len'] < 20000]

# Eliminate text outside of the phrases "Check the appropriate box" and "Pursuant to the requirements"
# 初次文本过滤，用正则匹配取出字符串“Check the appropriate box”与“Pursuant to the requirements”中的文本内容
new_text = []
for i in range(len(df2)):
    try:
        a = re.search(r'Check\sthe\sappropriate\sbox(.*?)Pursuant\sto\sthe\srequirements', df2['text'].iloc[i]).group(1)
        new_text.append(a)
    except AttributeError:
        new_text.append(None)
        
df2['filtered_text'] = new_text
df2 = df2.dropna()

# Add an end tag to the end of filtered text so that we will have a consistent string in each row
def add_end_tag(s):
    a = s + '<end_tag>'
    return a

df2['filtered_text2'] = df2['filtered_text'].map(add_end_tag)

# Elinimate the text before the word "On"
# 二次文本过滤，公司披露的实际信息以“On”开头，因此取出每行中以“On”开头之后的文本内容
new_text = []
for i in range(len(df2)):
    try:
        a = re.search(r'On\s(.*?)\<end\_tag\>', df2['filtered_text2'].iloc[i]).group(1)
        new_text.append(a)
    except AttributeError:
        new_text.append(None)

df2['filtered_text3'] = new_text
df2 = df2.dropna()

df3 = df2[['ticker', 'signal', 'release_date', 'filtered_text3']]
df3.to_csv('texts_and_fin2.csv')

## BERT Model (a majority of this code is copied from the BERT Tutorial)

Bert官方Github：https://github.com/google-research/bert

官方的中文预训练权重：chinese_L-12_H-768_A-12.zip https://storage.googleapis.com/bert_models/2018_11_03/chinese_L-12_H-768_A-12.zip

例子所在Github：https://github.com/bojone/bert_in_keras/

In [47]:
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime
from tensorflow import keras
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization
import os
import re
import pandas as pd
import numpy as np

df = pd.read_csv('texts_and_fin2.csv')

In [49]:
df.head()

Unnamed: 0.1,Unnamed: 0,ticker,signal,release_date,filtered_text3
0,0,MMM,up,2017-10-24 07:39:34,"October 24, 2017, 3M Company issued a press re..."
1,1,MMM,stay,2017-10-06 09:47:32,"October 5, 2017, 3M Company (the “Company”) is..."
2,2,MMM,stay,2017-10-02 11:23:35,"September 21, 2017, the Company entered into a..."
3,3,MMM,stay,2017-09-21 08:46:36,"September 21, 2017, 3M Company (the “Company”)..."
4,4,MMM,stay,2017-08-14 16:35:06,"August 13, 2017, the Board of Directors of 3M ..."


In [50]:
# Run this cell for a function for oversampling

def oversample(X,y):
    # Get number of rows with imbalanced class
    target = y.sum().idxmax()
    n = y[target].sum()
    # identify imbalanced targets
    imbalanced = y.drop(target,axis=1)
    #For each target, create a dataframe of randomly sampled rows, append to list
    append_list =  [y.loc[y[col]==1].sample(n=n-y[col].sum(),replace=True,random_state=20) for col in imbalanced.columns]
    append_list.append(y)
    y = pd.concat(append_list,axis=0)
    # match y indexes on other inputs
    X = X.loc[y.index]
    assert (y.index.all() == X.index.all())
    return X, y

df = df.rename(columns = {"filtered_text3": "filtered_text"})
df = df.sort_values(by='release_date', ascending=True, axis=0)
testNum = int(len(df) * -.1)
X_train = df['filtered_text'][:testNum].dropna()
y_train = pd.get_dummies(columns=['signal'],data=df['signal'])[:testNum].dropna().iloc[:, :]
test = df.loc[list(set(list(df.index)) - set(list(X_train.index)))]
X_test = test['filtered_text'].dropna()
y_test = test['signal'].dropna()

X_train, y_train = oversample(X_train, y_train)

# Recreate the signal variable
y_train["signal"] = np.nan

for i, y in y_train.iterrows():
    if str(type(y_train.loc[i])) == "<class 'pandas.core.frame.DataFrame'>": # If an index only has one observation, it draws up an error if we try to use the indexer agaon
        # They're usually classed as a series while the ones with many observations are considered a df. This is a way to get
        # rid of them
        if y_train.loc[i].iloc[0, 0] == 1:
            y_train.loc[i, "signal"] = "down"
        elif y_train.loc[i].iloc[0, 1] == 1:
            y_train.loc[i, "signal"] = "stay"
        else:
            y_train.loc[i, "signal"] = "up"
    else: # If they only have one observation, we settle it here instead
        if y_train.loc[i][0] == 1:
              y_train.loc[i, "signal"] = "down"
        elif y_train.loc[i][1] == 1:
            y_train.loc[i, "signal"] = "stay"
        else:
            y_train.loc[i, "signal"] = "up"
            
X_train2 = X_train.reset_index(drop = True)
y_train2 = y_train['signal'].reset_index(drop = True)

data = pd.concat([X_train2, y_train2], axis = 1)
data.rename(columns = {"filtered_text":"doc"}, inplace = True)

X_test = X_test.reset_index(drop = True)
y_test = y_test.reset_index(drop = True)

train = data
test = pd.concat([X_test, y_test], axis = 1) 
test.rename(columns = {"filtered_text":"doc"}, inplace = True)

In [54]:
train.head()

Unnamed: 0,doc,signal
0,"March 21, 2017, General Mills, Inc. issued a p...",down
1,"October 29, 2015, Stephen M. Dow, a member of ...",down
2,"May 7, 2015, Apache Corporation issued a press...",down
3,"January 18, 2017, the Board of Directors (the ...",down
4,"July 25, 2016, Cadence Design Systems, Inc. (""...",down


# BERT Model

In [30]:
# input data columns
DATA_COLUMN = 'doc'      # input data is the "doc" column
LABEL_COLUMN = 'signal'  # output data is the "signal" column
label_list = ['up', 'down', 'stay']   # label_list is the list of labels("up" ,"down" and "stay" for Stock prices were up, down and remain unchanged 

# Use bert library "run_classifier" model to transform our data into a format BERT understands
# 说明：将输入的数据转为bert模型输入的数据格式，主要有下面两个步骤：

"""
1、创建“InputExamples”，即使用“run_classifier”进行处理
    
    * text_a 是我们想要分类的数据，在本例中，即为data['doc']对应的列数据
    * text_b 是我们想要让模型理解句子的数据，可将其看成是text_a的label。
    
      举个栗子，机器翻译中，输入“我是一只喵”对应“I am a cat”，那么text_a 对应“我是一只喵”，text_b 对应“I am a cat”。
      再举个栗子，问答场景中，text_a 对应问题，text_b 对应答案。
      本例是个分类问题，我们只关心text_a和对应label
      
    * label这里对应“up、down、stay”，如果是二分类问题，则对应 “True”或者“False”等。


"""
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

"""
2、对我们输入的数据进行预处理，使其与BERT模型训练的数据相匹配。

   提示：不要紧张，这部分数据预处理的代码直接copy运行就好，下面我们对其主要实现的功能进行说明：

    加载google为我们训练好的bert模型参数，即bert-weight
    文本统一转为小写（bert在训练时使用小写字母）
    分词（(i.e. "I am cat" -> ["I", "am", "cat"])
    把单词分成单词块(i.e. "calling" -> ["call", "##ing"])
    使用BERT提供的vocab.txt文件将单词映射到索引
    为每一段文本添加标记，句子开始标记“CLS”，句子与句子之间的分割标记“SEP”
    为每个输入添加“index”和“segment”标记
"""

# 在线加载bert-weight
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"


# 本地加载bert-weight
BERT_MODEL_HUB = hub.load_module_spec("/root/None/Bert/Bert_Weight/bert_uncased_L-12_H-768_A-12")

# 下载bert-weight
"""
Windows直接访问链接即可：https://storage.googleapis.com/tfhub-modules/google/bert_uncased_L-12_H-768_A-12/1.tar.gz

Linux下载：wget https://storage.googleapis.com/tfhub-modules/google/bert_uncased_L-12_H-768_A-12/1.tar.gz

解压到指定目录：tar -zxvf 1.tar.gz -C  /root/None/Bert/Bert_Weight/bert_uncased_L-12_H-768_A-12  
"""

# 我们刚刚了解到我们使用的BERT模型需要小写的数据，存储在tokenization_info["do_lower_case"]中
def create_tokenizer_from_hub_module():
    """Get the vocab file and casing info from the Hub module."""
    with tf.Graph().as_default():
        bert_module = hub.Module(BERT_MODEL_HUB)
        tokenization_info = bert_module(signature="tokenization_info", as_dict=True)

        with tf.Session() as sess:
            vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],tokenization_info["do_lower_case"]])
      
    return bert.tokenization.FullTokenizer(vocab_file=vocab_file, do_lower_case=do_lower_case)

tokenizer = create_tokenizer_from_hub_module()


# 使用tokenizer分词，即run_classifier.convert_examples_to_features 将上述我们定义的"InputExample"转换为Bert能够理解的特性

# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 128
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)

Instructions for updating:
Colocations handled automatically by placer.


W0924 21:43:54.533146 140534296721152 deprecation.py:323] From /root/anaconda3/lib/python3.7/site-packages/tensorflow/python/ops/control_flow_ops.py:3632: colocate_with (from tensorflow.python.framework.ops) is deprecated and will be removed in a future version.
Instructions for updating:
Colocations handled automatically by placer.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0924 21:43:56.715095 140534296721152 saver.py:1483] Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Writing example 0 of 19458


I0924 21:43:57.191264 140534296721152 run_classifier.py:774] Writing example 0 of 19458


INFO:tensorflow:*** Example ***


I0924 21:43:57.195146 140534296721152 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0924 21:43:57.196774 140534296721152 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] march 21 , 2017 , general mills , inc . issued a press release reporting financial results for its fiscal quarter ended february 26 , 2017 . a copy of the press release is attached here ##to as exhibit 99 and incorporated here ##in by reference . the information furnished in item 2 . 02 of this current report on form 8 - k and exhibit 99 attached here ##to shall not be deemed to be filed for the purposes of section 18 of the securities exchange act of 1934 , as amended , or otherwise subject to lia ##bilities of that section or sections 11 and 12 ( a ) ( 2 ) of the securities act of 1933 , as amended , and shall [SEP]


I0924 21:43:57.198529 140534296721152 run_classifier.py:464] tokens: [CLS] march 21 , 2017 , general mills , inc . issued a press release reporting financial results for its fiscal quarter ended february 26 , 2017 . a copy of the press release is attached here ##to as exhibit 99 and incorporated here ##in by reference . the information furnished in item 2 . 02 of this current report on form 8 - k and exhibit 99 attached here ##to shall not be deemed to be filed for the purposes of section 18 of the securities exchange act of 1934 , as amended , or otherwise subject to lia ##bilities of that section or sections 11 and 12 ( a ) ( 2 ) of the securities act of 1933 , as amended , and shall [SEP]


INFO:tensorflow:input_ids: 101 2233 2538 1010 2418 1010 2236 6341 1010 4297 1012 3843 1037 2811 2713 7316 3361 3463 2005 2049 10807 4284 3092 2337 2656 1010 2418 1012 1037 6100 1997 1996 2811 2713 2003 4987 2182 3406 2004 8327 5585 1998 5100 2182 2378 2011 4431 1012 1996 2592 19851 1999 8875 1016 1012 6185 1997 2023 2783 3189 2006 2433 1022 1011 1047 1998 8327 5585 4987 2182 3406 4618 2025 2022 8357 2000 2022 6406 2005 1996 5682 1997 2930 2324 1997 1996 12012 3863 2552 1997 4579 1010 2004 13266 1010 2030 4728 3395 2000 22393 14680 1997 2008 2930 2030 5433 2340 1998 2260 1006 1037 1007 1006 1016 1007 1997 1996 12012 2552 1997 4537 1010 2004 13266 1010 1998 4618 102


I0924 21:43:57.200056 140534296721152 run_classifier.py:465] input_ids: 101 2233 2538 1010 2418 1010 2236 6341 1010 4297 1012 3843 1037 2811 2713 7316 3361 3463 2005 2049 10807 4284 3092 2337 2656 1010 2418 1012 1037 6100 1997 1996 2811 2713 2003 4987 2182 3406 2004 8327 5585 1998 5100 2182 2378 2011 4431 1012 1996 2592 19851 1999 8875 1016 1012 6185 1997 2023 2783 3189 2006 2433 1022 1011 1047 1998 8327 5585 4987 2182 3406 4618 2025 2022 8357 2000 2022 6406 2005 1996 5682 1997 2930 2324 1997 1996 12012 3863 2552 1997 4579 1010 2004 13266 1010 2030 4728 3395 2000 22393 14680 1997 2008 2930 2030 5433 2340 1998 2260 1006 1037 1007 1006 1016 1007 1997 1996 12012 2552 1997 4537 1010 2004 13266 1010 1998 4618 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0924 21:43:57.201696 140534296721152 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:43:57.203313 140534296721152 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: down (id = 1)


I0924 21:43:57.205016 140534296721152 run_classifier.py:468] label: down (id = 1)


INFO:tensorflow:*** Example ***


I0924 21:43:57.207891 140534296721152 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0924 21:43:57.209512 140534296721152 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] october 29 , 2015 , stephen m . dow , a member of the board of directors of ci ##trix systems , inc . ( the “ company ” ) since july 1989 , informed the company that he will be retiring from the board of directors of the company , effective as of the close of business on october 29 , 2015 . mr . dow advised the company that his decision to retire did not involve any disagreement with the company . signatures [SEP]


I0924 21:43:57.211165 140534296721152 run_classifier.py:464] tokens: [CLS] october 29 , 2015 , stephen m . dow , a member of the board of directors of ci ##trix systems , inc . ( the “ company ” ) since july 1989 , informed the company that he will be retiring from the board of directors of the company , effective as of the close of business on october 29 , 2015 . mr . dow advised the company that his decision to retire did not involve any disagreement with the company . signatures [SEP]


INFO:tensorflow:input_ids: 101 2255 2756 1010 2325 1010 4459 1049 1012 23268 1010 1037 2266 1997 1996 2604 1997 5501 1997 25022 29184 3001 1010 4297 1012 1006 1996 1523 2194 1524 1007 2144 2251 2960 1010 6727 1996 2194 2008 2002 2097 2022 9150 2013 1996 2604 1997 5501 1997 1996 2194 1010 4621 2004 1997 1996 2485 1997 2449 2006 2255 2756 1010 2325 1012 2720 1012 23268 9449 1996 2194 2008 2010 3247 2000 11036 2106 2025 9125 2151 18185 2007 1996 2194 1012 16442 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:43:57.212798 140534296721152 run_classifier.py:465] input_ids: 101 2255 2756 1010 2325 1010 4459 1049 1012 23268 1010 1037 2266 1997 1996 2604 1997 5501 1997 25022 29184 3001 1010 4297 1012 1006 1996 1523 2194 1524 1007 2144 2251 2960 1010 6727 1996 2194 2008 2002 2097 2022 9150 2013 1996 2604 1997 5501 1997 1996 2194 1010 4621 2004 1997 1996 2485 1997 2449 2006 2255 2756 1010 2325 1012 2720 1012 23268 9449 1996 2194 2008 2010 3247 2000 11036 2106 2025 9125 2151 18185 2007 1996 2194 1012 16442 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:43:57.214420 140534296721152 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:43:57.216067 140534296721152 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: down (id = 1)


I0924 21:43:57.217643 140534296721152 run_classifier.py:468] label: down (id = 1)


INFO:tensorflow:*** Example ***


I0924 21:43:57.220120 140534296721152 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0924 21:43:57.221764 140534296721152 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] may 7 , 2015 , apache corporation issued a press release announcing financial and operating results for the fiscal quarter ended march 31 , 2015 . the full text of the press release is furnished here ##with as exhibit 99 . 1 and incorporated here ##in by reference . signatures [SEP]


I0924 21:43:57.223392 140534296721152 run_classifier.py:464] tokens: [CLS] may 7 , 2015 , apache corporation issued a press release announcing financial and operating results for the fiscal quarter ended march 31 , 2015 . the full text of the press release is furnished here ##with as exhibit 99 . 1 and incorporated here ##in by reference . signatures [SEP]


INFO:tensorflow:input_ids: 101 2089 1021 1010 2325 1010 15895 3840 3843 1037 2811 2713 13856 3361 1998 4082 3463 2005 1996 10807 4284 3092 2233 2861 1010 2325 1012 1996 2440 3793 1997 1996 2811 2713 2003 19851 2182 24415 2004 8327 5585 1012 1015 1998 5100 2182 2378 2011 4431 1012 16442 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:43:57.224966 140534296721152 run_classifier.py:465] input_ids: 101 2089 1021 1010 2325 1010 15895 3840 3843 1037 2811 2713 13856 3361 1998 4082 3463 2005 1996 10807 4284 3092 2233 2861 1010 2325 1012 1996 2440 3793 1997 1996 2811 2713 2003 19851 2182 24415 2004 8327 5585 1012 1015 1998 5100 2182 2378 2011 4431 1012 16442 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:43:57.226694 140534296721152 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:43:57.228289 140534296721152 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: down (id = 1)


I0924 21:43:57.229836 140534296721152 run_classifier.py:468] label: down (id = 1)


INFO:tensorflow:*** Example ***


I0924 21:43:57.235572 140534296721152 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0924 21:43:57.237170 140534296721152 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] january 18 , 2017 , the board of directors ( the “ board ” ) of the williams companies , inc . ( the “ company ” ) approved amendments to the company ’ s by - laws to adopt proxy access . article iii , section 3 of the by - laws , the proxy access by - law provision , permits a stock ##holder , or group of up to 20 stock ##holders , owning at least 3 % of the company ’ s outstanding common stock continuously for at least three years to nominate and include in the company ’ s annual meeting proxy materials director nominees con ##stituting up to the greater of two nominees or 20 % of the board , [SEP]


I0924 21:43:57.238935 140534296721152 run_classifier.py:464] tokens: [CLS] january 18 , 2017 , the board of directors ( the “ board ” ) of the williams companies , inc . ( the “ company ” ) approved amendments to the company ’ s by - laws to adopt proxy access . article iii , section 3 of the by - laws , the proxy access by - law provision , permits a stock ##holder , or group of up to 20 stock ##holders , owning at least 3 % of the company ’ s outstanding common stock continuously for at least three years to nominate and include in the company ’ s annual meeting proxy materials director nominees con ##stituting up to the greater of two nominees or 20 % of the board , [SEP]


INFO:tensorflow:input_ids: 101 2254 2324 1010 2418 1010 1996 2604 1997 5501 1006 1996 1523 2604 1524 1007 1997 1996 3766 3316 1010 4297 1012 1006 1996 1523 2194 1524 1007 4844 16051 2000 1996 2194 1521 1055 2011 1011 4277 2000 11092 24540 3229 1012 3720 3523 1010 2930 1017 1997 1996 2011 1011 4277 1010 1996 24540 3229 2011 1011 2375 9347 1010 14245 1037 4518 14528 1010 2030 2177 1997 2039 2000 2322 4518 17794 1010 19273 2012 2560 1017 1003 1997 1996 2194 1521 1055 5151 2691 4518 10843 2005 2012 2560 2093 2086 2000 23388 1998 2421 1999 1996 2194 1521 1055 3296 3116 24540 4475 2472 17853 9530 21532 2039 2000 1996 3618 1997 2048 17853 2030 2322 1003 1997 1996 2604 1010 102


I0924 21:43:57.241645 140534296721152 run_classifier.py:465] input_ids: 101 2254 2324 1010 2418 1010 1996 2604 1997 5501 1006 1996 1523 2604 1524 1007 1997 1996 3766 3316 1010 4297 1012 1006 1996 1523 2194 1524 1007 4844 16051 2000 1996 2194 1521 1055 2011 1011 4277 2000 11092 24540 3229 1012 3720 3523 1010 2930 1017 1997 1996 2011 1011 4277 1010 1996 24540 3229 2011 1011 2375 9347 1010 14245 1037 4518 14528 1010 2030 2177 1997 2039 2000 2322 4518 17794 1010 19273 2012 2560 1017 1003 1997 1996 2194 1521 1055 5151 2691 4518 10843 2005 2012 2560 2093 2086 2000 23388 1998 2421 1999 1996 2194 1521 1055 3296 3116 24540 4475 2472 17853 9530 21532 2039 2000 1996 3618 1997 2048 17853 2030 2322 1003 1997 1996 2604 1010 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0924 21:43:57.243330 140534296721152 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:43:57.244967 140534296721152 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: down (id = 1)


I0924 21:43:57.246617 140534296721152 run_classifier.py:468] label: down (id = 1)


INFO:tensorflow:*** Example ***


I0924 21:43:57.249848 140534296721152 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0924 21:43:57.251522 140534296721152 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] july 25 , 2016 , cadence design systems , inc . ( " cadence " ) issued a press release announcing its financial results for the quarter ended july 2 , 2016 . a copy of the press release is attached here ##to as exhibit 99 . 01 and a copy of the commentary by the chief financial officer of cadence regarding cadence ' s financial results for the quarter ended july 2 , 2016 is attached here ##to as exhibit 99 . 02 , and the press release and the commentary are incorporated here ##in by reference . item 9 . 01 . financial statements and exhibits . signatures [SEP]


I0924 21:43:57.253114 140534296721152 run_classifier.py:464] tokens: [CLS] july 25 , 2016 , cadence design systems , inc . ( " cadence " ) issued a press release announcing its financial results for the quarter ended july 2 , 2016 . a copy of the press release is attached here ##to as exhibit 99 . 01 and a copy of the commentary by the chief financial officer of cadence regarding cadence ' s financial results for the quarter ended july 2 , 2016 is attached here ##to as exhibit 99 . 02 , and the press release and the commentary are incorporated here ##in by reference . item 9 . 01 . financial statements and exhibits . signatures [SEP]


INFO:tensorflow:input_ids: 101 2251 2423 1010 2355 1010 23620 2640 3001 1010 4297 1012 1006 1000 23620 1000 1007 3843 1037 2811 2713 13856 2049 3361 3463 2005 1996 4284 3092 2251 1016 1010 2355 1012 1037 6100 1997 1996 2811 2713 2003 4987 2182 3406 2004 8327 5585 1012 5890 1998 1037 6100 1997 1996 8570 2011 1996 2708 3361 2961 1997 23620 4953 23620 1005 1055 3361 3463 2005 1996 4284 3092 2251 1016 1010 2355 2003 4987 2182 3406 2004 8327 5585 1012 6185 1010 1998 1996 2811 2713 1998 1996 8570 2024 5100 2182 2378 2011 4431 1012 8875 1023 1012 5890 1012 3361 8635 1998 10637 1012 16442 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:43:57.254773 140534296721152 run_classifier.py:465] input_ids: 101 2251 2423 1010 2355 1010 23620 2640 3001 1010 4297 1012 1006 1000 23620 1000 1007 3843 1037 2811 2713 13856 2049 3361 3463 2005 1996 4284 3092 2251 1016 1010 2355 1012 1037 6100 1997 1996 2811 2713 2003 4987 2182 3406 2004 8327 5585 1012 5890 1998 1037 6100 1997 1996 8570 2011 1996 2708 3361 2961 1997 23620 4953 23620 1005 1055 3361 3463 2005 1996 4284 3092 2251 1016 1010 2355 2003 4987 2182 3406 2004 8327 5585 1012 6185 1010 1998 1996 2811 2713 1998 1996 8570 2024 5100 2182 2378 2011 4431 1012 8875 1023 1012 5890 1012 3361 8635 1998 10637 1012 16442 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:43:57.256382 140534296721152 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:43:57.257996 140534296721152 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: down (id = 1)


I0924 21:43:57.259644 140534296721152 run_classifier.py:468] label: down (id = 1)


INFO:tensorflow:Writing example 10000 of 19458


I0924 21:44:46.490596 140534296721152 run_classifier.py:774] Writing example 10000 of 19458


INFO:tensorflow:Writing example 0 of 1136


I0924 21:45:40.131425 140534296721152 run_classifier.py:774] Writing example 0 of 1136


INFO:tensorflow:*** Example ***


I0924 21:45:40.134605 140534296721152 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0924 21:45:40.136245 140534296721152 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] february 14 , 2018 , fl ##ir systems , inc . ( the “ company ” ) issued a news release announcing ( i ) its financial results for the quarter and year ended december 31 , 2017 , ( ii ) outlook for revenue and earnings per share for the year ending december 31 , 2018 , and ( iii ) the declaration of a quarterly divide ##nd . the news release issued february 14 , 2018 is furnished here ##with as exhibit 99 . 1 to this report and shall not be deemed to be filed for purposes of section 18 of the exchange act . item 9 . 01 . financial statements and exhibits . ( d ) exhibits . signature [SEP]


I0924 21:45:40.137781 140534296721152 run_classifier.py:464] tokens: [CLS] february 14 , 2018 , fl ##ir systems , inc . ( the “ company ” ) issued a news release announcing ( i ) its financial results for the quarter and year ended december 31 , 2017 , ( ii ) outlook for revenue and earnings per share for the year ending december 31 , 2018 , and ( iii ) the declaration of a quarterly divide ##nd . the news release issued february 14 , 2018 is furnished here ##with as exhibit 99 . 1 to this report and shall not be deemed to be filed for purposes of section 18 of the exchange act . item 9 . 01 . financial statements and exhibits . ( d ) exhibits . signature [SEP]


INFO:tensorflow:input_ids: 101 2337 2403 1010 2760 1010 13109 4313 3001 1010 4297 1012 1006 1996 1523 2194 1524 1007 3843 1037 2739 2713 13856 1006 1045 1007 2049 3361 3463 2005 1996 4284 1998 2095 3092 2285 2861 1010 2418 1010 1006 2462 1007 17680 2005 6599 1998 16565 2566 3745 2005 1996 2095 4566 2285 2861 1010 2760 1010 1998 1006 3523 1007 1996 8170 1997 1037 12174 11443 4859 1012 1996 2739 2713 3843 2337 2403 1010 2760 2003 19851 2182 24415 2004 8327 5585 1012 1015 2000 2023 3189 1998 4618 2025 2022 8357 2000 2022 6406 2005 5682 1997 2930 2324 1997 1996 3863 2552 1012 8875 1023 1012 5890 1012 3361 8635 1998 10637 1012 1006 1040 1007 10637 1012 8085 102 0 0


I0924 21:45:40.139335 140534296721152 run_classifier.py:465] input_ids: 101 2337 2403 1010 2760 1010 13109 4313 3001 1010 4297 1012 1006 1996 1523 2194 1524 1007 3843 1037 2739 2713 13856 1006 1045 1007 2049 3361 3463 2005 1996 4284 1998 2095 3092 2285 2861 1010 2418 1010 1006 2462 1007 17680 2005 6599 1998 16565 2566 3745 2005 1996 2095 4566 2285 2861 1010 2760 1010 1998 1006 3523 1007 1996 8170 1997 1037 12174 11443 4859 1012 1996 2739 2713 3843 2337 2403 1010 2760 2003 19851 2182 24415 2004 8327 5585 1012 1015 2000 2023 3189 1998 4618 2025 2022 8357 2000 2022 6406 2005 5682 1997 2930 2324 1997 1996 3863 2552 1012 8875 1023 1012 5890 1012 3361 8635 1998 10637 1012 1006 1040 1007 10637 1012 8085 102 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0


I0924 21:45:40.140806 140534296721152 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:45:40.142466 140534296721152 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: down (id = 1)


I0924 21:45:40.144130 140534296721152 run_classifier.py:468] label: down (id = 1)


INFO:tensorflow:*** Example ***


I0924 21:45:40.147039 140534296721152 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0924 21:45:40.148676 140534296721152 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] february 7 , 2018 , the compensation committee of the board of directors of fl ##ir systems , inc . , ( the “ company ” ) approved for 2018 an increase in the base salary of james j . cannon , the company ' s chief executive officer , from $ 750 , 000 ##k to $ 850 , 000 and increased his annual incentive plan target from 100 % of base salary to 110 % of base salary . signature [SEP]


I0924 21:45:40.150290 140534296721152 run_classifier.py:464] tokens: [CLS] february 7 , 2018 , the compensation committee of the board of directors of fl ##ir systems , inc . , ( the “ company ” ) approved for 2018 an increase in the base salary of james j . cannon , the company ' s chief executive officer , from $ 750 , 000 ##k to $ 850 , 000 and increased his annual incentive plan target from 100 % of base salary to 110 % of base salary . signature [SEP]


INFO:tensorflow:input_ids: 101 2337 1021 1010 2760 1010 1996 9430 2837 1997 1996 2604 1997 5501 1997 13109 4313 3001 1010 4297 1012 1010 1006 1996 1523 2194 1524 1007 4844 2005 2760 2019 3623 1999 1996 2918 10300 1997 2508 1046 1012 8854 1010 1996 2194 1005 1055 2708 3237 2961 1010 2013 1002 9683 1010 2199 2243 2000 1002 15678 1010 2199 1998 3445 2010 3296 20438 2933 4539 2013 2531 1003 1997 2918 10300 2000 7287 1003 1997 2918 10300 1012 8085 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:45:40.151989 140534296721152 run_classifier.py:465] input_ids: 101 2337 1021 1010 2760 1010 1996 9430 2837 1997 1996 2604 1997 5501 1997 13109 4313 3001 1010 4297 1012 1010 1006 1996 1523 2194 1524 1007 4844 2005 2760 2019 3623 1999 1996 2918 10300 1997 2508 1046 1012 8854 1010 1996 2194 1005 1055 2708 3237 2961 1010 2013 1002 9683 1010 2199 2243 2000 1002 15678 1010 2199 1998 3445 2010 3296 20438 2933 4539 2013 2531 1003 1997 2918 10300 2000 7287 1003 1997 2918 10300 1012 8085 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:45:40.153507 140534296721152 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:45:40.155207 140534296721152 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: stay (id = 2)


I0924 21:45:40.156590 140534296721152 run_classifier.py:468] label: stay (id = 2)


INFO:tensorflow:*** Example ***


I0924 21:45:40.159708 140534296721152 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0924 21:45:40.161354 140534296721152 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] february 5 , 2018 , fl ##ir systems , inc . ( the “ company ” ) completed the sale of its canadian security products subsidiary , lore ##x , inc . , in a transaction that also included it ’ s small and medium - sized security products business for approximately $ 29 million in cash . the news release issued february 6 , 2018 is furnished here ##with as exhibit 99 . 1 to this report and shall not be deemed to be filed for purposes of section 18 of the exchange act . item 9 . 01 . financial statements and exhibits ( d ) exhibits . signature [SEP]


I0924 21:45:40.163070 140534296721152 run_classifier.py:464] tokens: [CLS] february 5 , 2018 , fl ##ir systems , inc . ( the “ company ” ) completed the sale of its canadian security products subsidiary , lore ##x , inc . , in a transaction that also included it ’ s small and medium - sized security products business for approximately $ 29 million in cash . the news release issued february 6 , 2018 is furnished here ##with as exhibit 99 . 1 to this report and shall not be deemed to be filed for purposes of section 18 of the exchange act . item 9 . 01 . financial statements and exhibits ( d ) exhibits . signature [SEP]


INFO:tensorflow:input_ids: 101 2337 1019 1010 2760 1010 13109 4313 3001 1010 4297 1012 1006 1996 1523 2194 1524 1007 2949 1996 5096 1997 2049 3010 3036 3688 7506 1010 19544 2595 1010 4297 1012 1010 1999 1037 12598 2008 2036 2443 2009 1521 1055 2235 1998 5396 1011 7451 3036 3688 2449 2005 3155 1002 2756 2454 1999 5356 1012 1996 2739 2713 3843 2337 1020 1010 2760 2003 19851 2182 24415 2004 8327 5585 1012 1015 2000 2023 3189 1998 4618 2025 2022 8357 2000 2022 6406 2005 5682 1997 2930 2324 1997 1996 3863 2552 1012 8875 1023 1012 5890 1012 3361 8635 1998 10637 1006 1040 1007 10637 1012 8085 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:45:40.165212 140534296721152 run_classifier.py:465] input_ids: 101 2337 1019 1010 2760 1010 13109 4313 3001 1010 4297 1012 1006 1996 1523 2194 1524 1007 2949 1996 5096 1997 2049 3010 3036 3688 7506 1010 19544 2595 1010 4297 1012 1010 1999 1037 12598 2008 2036 2443 2009 1521 1055 2235 1998 5396 1011 7451 3036 3688 2449 2005 3155 1002 2756 2454 1999 5356 1012 1996 2739 2713 3843 2337 1020 1010 2760 2003 19851 2182 24415 2004 8327 5585 1012 1015 2000 2023 3189 1998 4618 2025 2022 8357 2000 2022 6406 2005 5682 1997 2930 2324 1997 1996 3863 2552 1012 8875 1023 1012 5890 1012 3361 8635 1998 10637 1006 1040 1007 10637 1012 8085 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:45:40.166945 140534296721152 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:45:40.168599 140534296721152 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: stay (id = 2)


I0924 21:45:40.170177 140534296721152 run_classifier.py:468] label: stay (id = 2)


INFO:tensorflow:*** Example ***


I0924 21:45:40.174310 140534296721152 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0924 21:45:40.175937 140534296721152 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] february 9 , 2018 , pay ##pal holdings , inc . ( the “ company ” ) drew down an additional $ 1 . 5 billion under its 36 ##4 - day delayed - draw term loan credit facility ( the “ facility ” ) , which is governed by the company ’ s previously disclosed 36 ##4 - day credit and guarantee agreement , a copy of which is attached as exhibit 10 . 1 to the company ’ s form 8 - k filed with the u . s . securities and exchange commission ( the “ sec ” ) on december 6 , 2017 . the borrowing under the facility bears interest at a rate equal to 3 - month li ##bor plus [SEP]


I0924 21:45:40.177638 140534296721152 run_classifier.py:464] tokens: [CLS] february 9 , 2018 , pay ##pal holdings , inc . ( the “ company ” ) drew down an additional $ 1 . 5 billion under its 36 ##4 - day delayed - draw term loan credit facility ( the “ facility ” ) , which is governed by the company ’ s previously disclosed 36 ##4 - day credit and guarantee agreement , a copy of which is attached as exhibit 10 . 1 to the company ’ s form 8 - k filed with the u . s . securities and exchange commission ( the “ sec ” ) on december 6 , 2017 . the borrowing under the facility bears interest at a rate equal to 3 - month li ##bor plus [SEP]


INFO:tensorflow:input_ids: 101 2337 1023 1010 2760 1010 3477 12952 9583 1010 4297 1012 1006 1996 1523 2194 1524 1007 3881 2091 2019 3176 1002 1015 1012 1019 4551 2104 2049 4029 2549 1011 2154 8394 1011 4009 2744 5414 4923 4322 1006 1996 1523 4322 1524 1007 1010 2029 2003 9950 2011 1996 2194 1521 1055 3130 21362 4029 2549 1011 2154 4923 1998 11302 3820 1010 1037 6100 1997 2029 2003 4987 2004 8327 2184 1012 1015 2000 1996 2194 1521 1055 2433 1022 1011 1047 6406 2007 1996 1057 1012 1055 1012 12012 1998 3863 3222 1006 1996 1523 10819 1524 1007 2006 2285 1020 1010 2418 1012 1996 23733 2104 1996 4322 6468 3037 2012 1037 3446 5020 2000 1017 1011 3204 5622 12821 4606 102


I0924 21:45:40.179257 140534296721152 run_classifier.py:465] input_ids: 101 2337 1023 1010 2760 1010 3477 12952 9583 1010 4297 1012 1006 1996 1523 2194 1524 1007 3881 2091 2019 3176 1002 1015 1012 1019 4551 2104 2049 4029 2549 1011 2154 8394 1011 4009 2744 5414 4923 4322 1006 1996 1523 4322 1524 1007 1010 2029 2003 9950 2011 1996 2194 1521 1055 3130 21362 4029 2549 1011 2154 4923 1998 11302 3820 1010 1037 6100 1997 2029 2003 4987 2004 8327 2184 1012 1015 2000 1996 2194 1521 1055 2433 1022 1011 1047 6406 2007 1996 1057 1012 1055 1012 12012 1998 3863 3222 1006 1996 1523 10819 1524 1007 2006 2285 1020 1010 2418 1012 1996 23733 2104 1996 4322 6468 3037 2012 1037 3446 5020 2000 1017 1011 3204 5622 12821 4606 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0924 21:45:40.180946 140534296721152 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:45:40.182539 140534296721152 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: stay (id = 2)


I0924 21:45:40.184193 140534296721152 run_classifier.py:468] label: stay (id = 2)


INFO:tensorflow:*** Example ***


I0924 21:45:40.187681 140534296721152 run_classifier.py:461] *** Example ***


INFO:tensorflow:guid: None


I0924 21:45:40.189468 140534296721152 run_classifier.py:462] guid: None


INFO:tensorflow:tokens: [CLS] february 14 , 2018 , cf industries holdings , inc . issued a press release announcing its results for the quarter ended december 31 , 2017 . the press release is attached here ##to as exhibit 99 . 1 . the information set forth here ##in , including the exhibit attached here ##to , shall not be deemed " filed " for purposes of section 18 of the securities exchange act of 1934 , as amended , nor shall it be deemed incorporated by reference in any filing under the securities act of 1933 , as amended , except as shall be express ##ly set forth by specific reference in any such filing . item 9 . 01 . financial statements and exhibits . ( d [SEP]


I0924 21:45:40.191170 140534296721152 run_classifier.py:464] tokens: [CLS] february 14 , 2018 , cf industries holdings , inc . issued a press release announcing its results for the quarter ended december 31 , 2017 . the press release is attached here ##to as exhibit 99 . 1 . the information set forth here ##in , including the exhibit attached here ##to , shall not be deemed " filed " for purposes of section 18 of the securities exchange act of 1934 , as amended , nor shall it be deemed incorporated by reference in any filing under the securities act of 1933 , as amended , except as shall be express ##ly set forth by specific reference in any such filing . item 9 . 01 . financial statements and exhibits . ( d [SEP]


INFO:tensorflow:input_ids: 101 2337 2403 1010 2760 1010 12935 6088 9583 1010 4297 1012 3843 1037 2811 2713 13856 2049 3463 2005 1996 4284 3092 2285 2861 1010 2418 1012 1996 2811 2713 2003 4987 2182 3406 2004 8327 5585 1012 1015 1012 1996 2592 2275 5743 2182 2378 1010 2164 1996 8327 4987 2182 3406 1010 4618 2025 2022 8357 1000 6406 1000 2005 5682 1997 2930 2324 1997 1996 12012 3863 2552 1997 4579 1010 2004 13266 1010 4496 4618 2009 2022 8357 5100 2011 4431 1999 2151 15242 2104 1996 12012 2552 1997 4537 1010 2004 13266 1010 3272 2004 4618 2022 4671 2135 2275 5743 2011 3563 4431 1999 2151 2107 15242 1012 8875 1023 1012 5890 1012 3361 8635 1998 10637 1012 1006 1040 102


I0924 21:45:40.192863 140534296721152 run_classifier.py:465] input_ids: 101 2337 2403 1010 2760 1010 12935 6088 9583 1010 4297 1012 3843 1037 2811 2713 13856 2049 3463 2005 1996 4284 3092 2285 2861 1010 2418 1012 1996 2811 2713 2003 4987 2182 3406 2004 8327 5585 1012 1015 1012 1996 2592 2275 5743 2182 2378 1010 2164 1996 8327 4987 2182 3406 1010 4618 2025 2022 8357 1000 6406 1000 2005 5682 1997 2930 2324 1997 1996 12012 3863 2552 1997 4579 1010 2004 13266 1010 4496 4618 2009 2022 8357 5100 2011 4431 1999 2151 15242 2104 1996 12012 2552 1997 4537 1010 2004 13266 1010 3272 2004 4618 2022 4671 2135 2275 5743 2011 3563 4431 1999 2151 2107 15242 1012 8875 1023 1012 5890 1012 3361 8635 1998 10637 1012 1006 1040 102


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


I0924 21:45:40.194488 140534296721152 run_classifier.py:466] input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


I0924 21:45:40.196182 140534296721152 run_classifier.py:467] segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: up (id = 0)


I0924 21:45:40.197819 140534296721152 run_classifier.py:468] label: up (id = 0)


In [43]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,num_labels):
    """Creates a classification model."""
    bert_module = hub.Module(BERT_MODEL_HUB,trainable=True)
    
    bert_inputs = dict(input_ids=input_ids,input_mask=input_mask,segment_ids=segment_ids)
    
    bert_outputs = bert_module(inputs=bert_inputs,signature="tokens",as_dict=True)
    
    # Use "pooled_output" for classification tasks on an entire sentence.
    # Use "sequence_outputs" for token-level output.
    output_layer = bert_outputs["pooled_output"]
    
    hidden_size = output_layer.shape[-1].value

    # Create our own layer to tune for politeness data.
    output_weights = tf.get_variable(   # initialize W
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
        "output_bias", [num_labels],  # initialize b
        
        initializer=tf.zeros_initializer())
    
    # loss functiong definition 
    with tf.variable_scope("loss"):
        # Dropout helps prevent overfitting
        output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        log_probs = tf.nn.log_softmax(logits, axis=-1) # log交叉商损失

        # Convert labels into one-hot encoding
        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

        predicted_labels = tf.squeeze(tf.argmax(log_probs, axis=-1, output_type=tf.int32))
        # If we're predicting, we want predicted labels and the probabiltiies.
        if is_predicting:
            return (predicted_labels, log_probs)
        
        # If we're train/eval, compute loss between predicted and actual label
        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1) # 对输入张量所有元素求和
        loss = tf.reduce_mean(per_example_loss)
        
        return (loss, predicted_labels, log_probs)

# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.
def model_fn_builder(num_labels, learning_rate, num_train_steps,num_warmup_steps):
    """Returns `model_fn` closure for TPUEstimator."""
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""
        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)

        # TRAIN and EVAL
        if not is_predicting:
            (loss, predicted_labels, log_probs) = create_model(
                is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

            train_op = bert.optimization.create_optimizer(
              loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

          # Calculate evaluation metrics. 
            def metric_fn(label_ids, predicted_labels):
                accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
                return {"eval_accuracy": accuracy}

            eval_metrics = metric_fn(label_ids, predicted_labels)

            if mode == tf.estimator.ModeKeys.TRAIN:
                return tf.estimator.EstimatorSpec(mode=mode,
                loss=loss,
                train_op=train_op)
            else:
                return tf.estimator.EstimatorSpec(mode=mode, loss=loss, eval_metric_ops=eval_metrics)
        else:
            (predicted_labels, log_probs) = create_model(is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

            predictions = {'probabilities': log_probs, 'labels': predicted_labels}
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)

        # Return the actual model function in the closure
    return model_fn

In [None]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 2e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where the learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

# Specify output directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig()

model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})

# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)



W0926 13:39:57.122830 140534296721152 estimator.py:1760] Using temporary folder as model directory: /tmp/tmp_rcn2l66


INFO:tensorflow:Using config: {'_model_dir': '/tmp/tmp_rcn2l66', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd00eae5be0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


I0926 13:39:57.126348 140534296721152 estimator.py:201] Using config: {'_model_dir': '/tmp/tmp_rcn2l66', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': None, '_save_checkpoints_secs': 600, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7fd00eae5be0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


Beginning Training!
INFO:tensorflow:Calling model_fn.


I0926 13:40:07.522460 140534296721152 estimator.py:1111] Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0926 13:40:11.165134 140534296721152 saver.py:1483] Saver not created because there are no variables in the graph to restore
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.


I0926 13:40:19.948923 140534296721152 estimator.py:1113] Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


I0926 13:40:19.953119 140534296721152 basic_session_run_hooks.py:527] Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


I0926 13:40:24.247374 140534296721152 monitored_session.py:222] Graph was finalized.


INFO:tensorflow:Running local_init_op.


I0926 13:40:30.862452 140534296721152 session_manager.py:491] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0926 13:40:31.114963 140534296721152 session_manager.py:493] Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into /tmp/tmp_rcn2l66/model.ckpt.


I0926 13:40:43.668287 140534296721152 basic_session_run_hooks.py:594] Saving checkpoints for 0 into /tmp/tmp_rcn2l66/model.ckpt.


INFO:tensorflow:loss = 1.1336284, step = 1


I0926 13:41:35.388651 140534296721152 basic_session_run_hooks.py:249] loss = 1.1336284, step = 1


INFO:tensorflow:Saving checkpoints for 20 into /tmp/tmp_rcn2l66/model.ckpt.


I0926 13:51:16.255808 140534296721152 basic_session_run_hooks.py:594] Saving checkpoints for 20 into /tmp/tmp_rcn2l66/model.ckpt.


## Evaluate model on testing data

In [45]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

estimator.evaluate(input_fn=test_input_fn, steps=None)

INFO:tensorflow:Calling model_fn.


I0926 09:26:43.050928 140534296721152 estimator.py:1111] Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


I0926 09:26:47.351857 140534296721152 saver.py:1483] Saver not created because there are no variables in the graph to restore
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.


I0926 09:26:56.127393 140534296721152 estimator.py:1113] Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2019-09-26T01:26:56Z


I0926 09:26:56.150754 140534296721152 evaluation.py:257] Starting evaluation at 2019-09-26T01:26:56Z


INFO:tensorflow:Graph was finalized.


I0926 09:26:57.618717 140534296721152 monitored_session.py:222] Graph was finalized.


INFO:tensorflow:Restoring parameters from /tmp/tmpg1x3i31y/model.ckpt-1645


I0926 09:26:57.621579 140534296721152 saver.py:1270] Restoring parameters from /tmp/tmpg1x3i31y/model.ckpt-1645


INFO:tensorflow:Running local_init_op.


I0926 09:27:00.196670 140534296721152 session_manager.py:491] Running local_init_op.


INFO:tensorflow:Done running local_init_op.


I0926 09:27:00.450539 140534296721152 session_manager.py:493] Done running local_init_op.


INFO:tensorflow:Finished evaluation at 2019-09-26-01:36:15


I0926 09:36:15.369873 140534296721152 evaluation.py:277] Finished evaluation at 2019-09-26-01:36:15


INFO:tensorflow:Saving dict for global step 1645: eval_accuracy = 0.6778169, global_step = 1645, loss = 0.8704734


I0926 09:36:15.371503 140534296721152 estimator.py:1979] Saving dict for global step 1645: eval_accuracy = 0.6778169, global_step = 1645, loss = 0.8704734


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 1645: /tmp/tmpg1x3i31y/model.ckpt-1645


I0926 09:36:17.521571 140534296721152 estimator.py:2039] Saving 'checkpoint_path' summary for global step 1645: /tmp/tmpg1x3i31y/model.ckpt-1645


{'eval_accuracy': 0.6778169, 'loss': 0.8704734, 'global_step': 1645}