# BERT Melon

## Project Imports

In [1]:
from sklearn.model_selection import train_test_split
import pandas as pd
import tensorflow as tf
import tensorflow_hub as hub
from datetime import datetime

In [2]:
!pip install bert-tensorflow

Defaulting to user installation because normal site-packages is not writeable


In [3]:
import bert
from bert import run_classifier
from bert import optimization
from bert import tokenization




In [4]:
import pandas as pd
import numpy as np
from sklearn.model_selection import train_test_split

## Loading the dataset

In [5]:
df = pd.read_csv('data/agr_en_tw_gold.csv', names=['serial_no', 'tweets', 'sentiment'])
print(df.shape)
df.head(15)

(1257, 3)


Unnamed: 0,serial_no,tweets,sentiment
0,sme_1,6-0 hahahahahaha har ek pakistani ko yeh pic d...,NAG
1,sme_2,@sardanarohit :While entire nation is praying ...,OAG
2,sme_3,shoaib: wahab riaz looks like johnson. kapil d...,NAG
3,sme_4,"ramraj cottons r. ashwin southindian , chennai...",NAG
4,sme_5,#indvsuae jeet gaye... :) :) cmon starsports n...,NAG
5,sme_6,tt india 08:24 1.#indvspak 2.#mustwatchmsg 3.#...,NAG
6,sme_7,#shutdownjnu over democratic setup feeds these...,CAG
7,sme_8,umesh strikes double !!! uae 102/10 #bleedblue...,NAG
8,sme_9,bwahahahaha my last rt!!! 😂😂😂😂😂😂😂😂😂 #indvsuae,NAG
9,sme_10,Is JNU - Jihadi Nurturing University! #ShutDow...,OAG


In [6]:
df['sentiment'].unique()

array(['NAG', 'OAG', 'CAG'], dtype=object)

The aggression is categorized into different classes which are as follows - 

1. Aggression: It is a human behavior intended to harm another by verbally, physically and
psychologically. Overtly Aggressive (OAG): This class includes the following cases.
(a) Aggression shown openly with verbal attack directly pointed towards any group or
individuals.
(b) Attack commenced using abusive words or calling names or comparing in a derogatory
manner.
(c) By supporting false attack or supporting others comment.
(d) Sometimes these texts also contain indirect references.
2. Covertly Aggressive (CAG): In these attacks aggression is generally hidden and contains
sarcastic negative emotions due to its indirect nature. It can be summarized as follows.
(a) By using metaphorical words to attack an individual,nation,religion.
(b) Praising someone by criticizing group irrespective of being right or wrong.
(c) Sometimes these texts also contain direct references.
3. Non Aggressive (NAG): These statements generally lack the intention to be aggressive
and mostly used while referring to the correct facts, wishing or supporting individuals or
groups on social issues.

In [7]:
integer_mapping = {'NAG' : 0, 'OAG' : 1, 'CAG' : 2}
df = df.replace({'sentiment' : integer_mapping})

In [8]:
df['sentiment'].unique()

array([0, 1, 2])

In [9]:
train, test = train_test_split(df, test_size=0.2, random_state=42)

## Data Preprocessing

In [10]:
DATA_COLUMN = 'tweets'
LABEL_COLUMN = 'sentiment'
label_list = df['sentiment'].unique() # Use the InputExample class from BERT's run_classifier code to create examples from the data
print(label_list)

[0 1 2]


In [11]:
train_InputExamples = train.apply(lambda x: bert.run_classifier.InputExample(guid=None, # Globally unique ID for bookkeeping, unused in this example
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

test_InputExamples = test.apply(lambda x: bert.run_classifier.InputExample(guid=None, 
                                                                   text_a = x[DATA_COLUMN], 
                                                                   text_b = None, 
                                                                   label = x[LABEL_COLUMN]), axis = 1)

In [12]:
# This is a path to an uncased (all lowercase) version of BERT
BERT_MODEL_HUB = "https://tfhub.dev/google/bert_uncased_L-12_H-768_A-12/1"


def create_tokenizer_from_hub_module():
    """Get the vocab file and casing info from the Hub module."""
    with tf.Graph().as_default():
        bert_module = hub.Module(BERT_MODEL_HUB)
        tokenization_info = bert_module(
            signature="tokenization_info", as_dict=True)
        with tf.Session() as sess:
            vocab_file, do_lower_case = sess.run([tokenization_info["vocab_file"],
                                                  tokenization_info["do_lower_case"]])

    return bert.tokenization.FullTokenizer(
        vocab_file=vocab_file, do_lower_case=do_lower_case)


tokenizer = create_tokenizer_from_hub_module()

INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore








In [13]:
tokenizer.tokenize(train['tweets'].iloc[0])

['mis',
 '##bah',
 'ka',
 '##pr',
 '##aan',
 'hai',
 'jo',
 'ab',
 '##hi',
 'tak',
 'century',
 'nah',
 '##i',
 'l',
 '##ga',
 'pay',
 '##a',
 'ha',
 '##ha',
 '#',
 'ind',
 '##win',
 '##s',
 '#',
 'ind',
 '##vs',
 '##pa',
 '##k']

In [14]:
# We'll set sequences to be at most 128 tokens long.
MAX_SEQ_LENGTH = 60
# Convert our train and test features to InputFeatures that BERT understands.
train_features = bert.run_classifier.convert_examples_to_features(train_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)
test_features = bert.run_classifier.convert_examples_to_features(test_InputExamples, label_list, MAX_SEQ_LENGTH, tokenizer)







INFO:tensorflow:Writing example 0 of 1005


INFO:tensorflow:Writing example 0 of 1005


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] mis ##bah ka ##pr ##aan hai jo ab ##hi tak century nah ##i l ##ga pay ##a ha ##ha # ind ##win ##s # ind ##vs ##pa ##k [SEP]


INFO:tensorflow:tokens: [CLS] mis ##bah ka ##pr ##aan hai jo ab ##hi tak century nah ##i l ##ga pay ##a ha ##ha # ind ##win ##s # ind ##vs ##pa ##k [SEP]


INFO:tensorflow:input_ids: 101 28616 24206 10556 18098 14634 15030 8183 11113 4048 27006 2301 20976 2072 1048 3654 3477 2050 5292 3270 1001 27427 10105 2015 1001 27427 15088 4502 2243 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 28616 24206 10556 18098 14634 15030 8183 11113 4048 27006 2301 20976 2072 1048 3654 3477 2050 5292 3270 1001 27427 10105 2015 1001 27427 15088 4502 2243 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] @ b ##du ##tt @ n ##dt ##v pak ##is never claimed arresting mas ##ood . diversion ##ary tactics aka ta ##qui ##yya , to def ##le ##ct attention away from j ##nu . # shut ##down ##jn ##u [SEP]


INFO:tensorflow:tokens: [CLS] @ b ##du ##tt @ n ##dt ##v pak ##is never claimed arresting mas ##ood . diversion ##ary tactics aka ta ##qui ##yya , to def ##le ##ct attention away from j ##nu . # shut ##down ##jn ##u [SEP]


INFO:tensorflow:input_ids: 101 1030 1038 8566 4779 1030 1050 11927 2615 22190 2483 2196 3555 28427 16137 17139 1012 20150 5649 9887 9875 11937 15549 19903 1010 2000 13366 2571 6593 3086 2185 2013 1046 11231 1012 1001 3844 7698 22895 2226 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1030 1038 8566 4779 1030 1050 11927 2615 22190 2483 2196 3555 28427 16137 17139 1012 20150 5649 9887 9875 11937 15549 19903 1010 2000 13366 2571 6593 3086 2185 2013 1046 11231 1012 1001 3844 7698 22895 2226 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] @ _ yo ##gen ##dra ##yad ##av @ prakash ##kara ##t ##4 ##pm why are you quite on de ##ga ##dation of your university image # shut ##down ##jn ##u pic . twitter . com / i ##ux ##ug ##d ##wg ##d ##k [SEP]


INFO:tensorflow:tokens: [CLS] @ _ yo ##gen ##dra ##yad ##av @ prakash ##kara ##t ##4 ##pm why are you quite on de ##ga ##dation of your university image # shut ##down ##jn ##u pic . twitter . com / i ##ux ##ug ##d ##wg ##d ##k [SEP]


INFO:tensorflow:input_ids: 101 1030 1035 10930 6914 7265 25152 11431 1030 22233 16566 2102 2549 9737 2339 2024 2017 3243 2006 2139 3654 20207 1997 2115 2118 3746 1001 3844 7698 22895 2226 27263 1012 10474 1012 4012 1013 1045 5602 15916 2094 27767 2094 2243 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1030 1035 10930 6914 7265 25152 11431 1030 22233 16566 2102 2549 9737 2339 2024 2017 3243 2006 2139 3654 20207 1997 2115 2118 3746 1001 3844 7698 22895 2226 27263 1012 10474 1012 4012 1013 1045 5602 15916 2094 27767 2094 2243 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 2 (id = 2)


INFO:tensorflow:label: 2 (id = 2)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] # shut ##down ##jn ##u so called left professor s in j ##nu , pl ##s leave this nation n form a camp in po ##k . . ur services r not required . [SEP]


INFO:tensorflow:tokens: [CLS] # shut ##down ##jn ##u so called left professor s in j ##nu , pl ##s leave this nation n form a camp in po ##k . . ur services r not required . [SEP]


INFO:tensorflow:input_ids: 101 1001 3844 7698 22895 2226 2061 2170 2187 2934 1055 1999 1046 11231 1010 20228 2015 2681 2023 3842 1050 2433 1037 3409 1999 13433 2243 1012 1012 24471 2578 1054 2025 3223 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1001 3844 7698 22895 2226 2061 2170 2187 2934 1055 1999 1046 11231 1010 20228 2015 2681 2023 3842 1050 2433 1037 3409 1999 13433 2243 1012 1012 24471 2578 1054 2025 3223 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] no b ##ha ##j ##ji , yu ##vi , se ##hwa ##g , ga ##mb ##hir , gu ##l , aj ##mal , ha ##fe ##ez , za ##hee ##r . well it be the same # ind ##vs ##pa ##k encounter @ cricket ##aa ##kas ##h [SEP]


INFO:tensorflow:tokens: [CLS] no b ##ha ##j ##ji , yu ##vi , se ##hwa ##g , ga ##mb ##hir , gu ##l , aj ##mal , ha ##fe ##ez , za ##hee ##r . well it be the same # ind ##vs ##pa ##k encounter @ cricket ##aa ##kas ##h [SEP]


INFO:tensorflow:input_ids: 101 2053 1038 3270 3501 4478 1010 9805 5737 1010 7367 18663 2290 1010 11721 14905 11961 1010 19739 2140 1010 19128 9067 1010 5292 7959 9351 1010 23564 21030 2099 1012 2092 2009 2022 1996 2168 1001 27427 15088 4502 2243 8087 1030 4533 11057 13716 2232 102 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 2053 1038 3270 3501 4478 1010 9805 5737 1010 7367 18663 2290 1010 11721 14905 11961 1010 19739 2140 1010 19128 9067 1010 5292 7959 9351 1010 23564 21030 2099 1012 2092 2009 2022 1996 2168 1001 27427 15088 4502 2243 8087 1030 4533 11057 13716 2232 102 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:Writing example 0 of 252


INFO:tensorflow:Writing example 0 of 252


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] ja ##b sunday ko team india se pakistan tak ##ray ##ega tab stadium me sir ##f tri ##nga la ##her ##ay ##ega # cw ##c ##15 # ind ##vs ##pa ##k # world ##cup http : / / t . co / d ##f ##h ##wo ##m ##13 ##7 ##b [SEP]


INFO:tensorflow:tokens: [CLS] ja ##b sunday ko team india se pakistan tak ##ray ##ega tab stadium me sir ##f tri ##nga la ##her ##ay ##ega # cw ##c ##15 # ind ##vs ##pa ##k # world ##cup http : / / t . co / d ##f ##h ##wo ##m ##13 ##7 ##b [SEP]


INFO:tensorflow:input_ids: 101 14855 2497 4465 12849 2136 2634 7367 4501 27006 9447 29107 21628 3346 2033 2909 2546 13012 13807 2474 5886 4710 29107 1001 19296 2278 16068 1001 27427 15088 4502 2243 1001 2088 15569 8299 1024 1013 1013 1056 1012 2522 1013 1040 2546 2232 12155 2213 17134 2581 2497 102 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 14855 2497 4465 12849 2136 2634 7367 4501 27006 9447 29107 21628 3346 2033 2909 2546 13012 13807 2474 5886 4710 29107 1001 19296 2278 16068 1001 27427 15088 4502 2243 1001 2088 15569 8299 1024 1013 1013 1056 1012 2522 1013 1040 2546 2232 12155 2213 17134 2581 2497 102 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] @ na ##ren ##dra ##mo ##di # shut ##down ##jn ##u we need education but not against nation [SEP]


INFO:tensorflow:tokens: [CLS] @ na ##ren ##dra ##mo ##di # shut ##down ##jn ##u we need education but not against nation [SEP]


INFO:tensorflow:input_ids: 101 1030 6583 7389 7265 5302 4305 1001 3844 7698 22895 2226 2057 2342 2495 2021 2025 2114 3842 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1030 6583 7389 7265 5302 4305 1001 3844 7698 22895 2226 2057 2342 2495 2021 2025 2114 3842 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] # ma ##uke ##pe ##cha ##uka ! india vs uae - india wins now turn for uae ma ##uka ma ##uka stars ##port ads . # ind ##vs ##sa , # ind ##vs ##ua ##e , # cw ##c ##15 http : / / t . co / kai ##m ##va ##zi ##2 ##z [SEP]


INFO:tensorflow:tokens: [CLS] # ma ##uke ##pe ##cha ##uka ! india vs uae - india wins now turn for uae ma ##uka ma ##uka stars ##port ads . # ind ##vs ##sa , # ind ##vs ##ua ##e , # cw ##c ##15 http : / / t . co / kai ##m ##va ##zi ##2 ##z [SEP]


INFO:tensorflow:input_ids: 101 1001 5003 15851 5051 7507 15750 999 2634 5443 17641 1011 2634 5222 2085 2735 2005 17641 5003 15750 5003 15750 3340 6442 14997 1012 1001 27427 15088 3736 1010 1001 27427 15088 6692 2063 1010 1001 19296 2278 16068 8299 1024 1013 1013 1056 1012 2522 1013 11928 2213 3567 5831 2475 2480 102 0 0 0 0


INFO:tensorflow:input_ids: 101 1001 5003 15851 5051 7507 15750 999 2634 5443 17641 1011 2634 5222 2085 2735 2005 17641 5003 15750 5003 15750 3340 6442 14997 1012 1001 27427 15088 3736 1010 1001 27427 15088 6692 2063 1010 1001 19296 2278 16068 8299 1024 1013 1013 1056 1012 2522 1013 11928 2213 3567 5831 2475 2480 102 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] # ind ##vs ##ua ##e b ##hu ##bane ##sw ##ar kumar is back and [SEP]


INFO:tensorflow:tokens: [CLS] # ind ##vs ##ua ##e b ##hu ##bane ##sw ##ar kumar is back and [SEP]


INFO:tensorflow:input_ids: 101 1001 27427 15088 6692 2063 1038 6979 27543 26760 2906 9600 2003 2067 1998 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1001 27427 15088 6692 2063 1038 6979 27543 26760 2906 9600 2003 2067 1998 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: None


INFO:tensorflow:guid: None


INFO:tensorflow:tokens: [CLS] # shut ##down ##jn ##u j ##nu is the nursery of anti - nationals and terrorist - lovers . keep it trend ##ing ! let these traitor ##s know , their game is over . [SEP]


INFO:tensorflow:tokens: [CLS] # shut ##down ##jn ##u j ##nu is the nursery of anti - nationals and terrorist - lovers . keep it trend ##ing ! let these traitor ##s know , their game is over . [SEP]


INFO:tensorflow:input_ids: 101 1001 3844 7698 22895 2226 1046 11231 2003 1996 13640 1997 3424 1011 10342 1998 9452 1011 10205 1012 2562 2009 9874 2075 999 2292 2122 17328 2015 2113 1010 2037 2208 2003 2058 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1001 3844 7698 22895 2226 1046 11231 2003 1996 13640 1997 3424 1011 10342 1998 9452 1011 10205 1012 2562 2009 9874 2075 999 2292 2122 17328 2015 2113 1010 2037 2208 2003 2058 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 1 (id = 1)


INFO:tensorflow:label: 1 (id = 1)


## BERT Model

In [16]:
def create_model(is_predicting, input_ids, input_mask, segment_ids, labels,
                 num_labels):
    """Creates a classification model."""

    bert_module = hub.Module(
        BERT_MODEL_HUB,
        trainable=True)
    bert_inputs = dict(
        input_ids=input_ids,
        input_mask=input_mask,
        segment_ids=segment_ids)
    bert_outputs = bert_module(
        inputs=bert_inputs,
        signature="tokens",
        as_dict=True)

    # Use "pooled_output" for classification tasks on an entire sentence.
    # Use "sequence_outputs" for token-level output.
    output_layer = bert_outputs["pooled_output"]

    hidden_size = output_layer.shape[-1].value

    # Create our own layer to tune for politeness data.
    output_weights = tf.get_variable(
        "output_weights", [num_labels, hidden_size],
        initializer=tf.truncated_normal_initializer(stddev=0.02))

    output_bias = tf.get_variable(
        "output_bias", [num_labels], initializer=tf.zeros_initializer())

    with tf.variable_scope("loss"):

        # Dropout helps prevent overfitting
        output_layer = tf.nn.dropout(output_layer, keep_prob=0.9)

        logits = tf.matmul(output_layer, output_weights, transpose_b=True)
        logits = tf.nn.bias_add(logits, output_bias)
        log_probs = tf.nn.log_softmax(logits, axis=-1)

        # Convert labels into one-hot encoding
        one_hot_labels = tf.one_hot(labels, depth=num_labels, dtype=tf.float32)

        predicted_labels = tf.squeeze(
            tf.argmax(log_probs, axis=-1, output_type=tf.int32))
        # If we're predicting, we want predicted labels and the probabiltiies.
        if is_predicting:
            return (predicted_labels, log_probs)

        # If we're train/eval, compute loss between predicted and actual label
        per_example_loss = -tf.reduce_sum(one_hot_labels * log_probs, axis=-1)
        loss = tf.reduce_mean(per_example_loss)
        return (loss, predicted_labels, log_probs)


In [17]:
# model_fn_builder actually creates our model function
# using the passed parameters for num_labels, learning_rate, etc.


def model_fn_builder(num_labels, learning_rate, num_train_steps,
                     num_warmup_steps):
    """Returns `model_fn` closure for TPUEstimator."""
    def model_fn(features, labels, mode, params):  # pylint: disable=unused-argument
        """The `model_fn` for TPUEstimator."""

        input_ids = features["input_ids"]
        input_mask = features["input_mask"]
        segment_ids = features["segment_ids"]
        label_ids = features["label_ids"]

        is_predicting = (mode == tf.estimator.ModeKeys.PREDICT)

        # TRAIN and EVAL
        if not is_predicting:

            (loss, predicted_labels, log_probs) = create_model(
                is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

            train_op = bert.optimization.create_optimizer(
                loss, learning_rate, num_train_steps, num_warmup_steps, use_tpu=False)

            # Calculate evaluation metrics.
            def metric_fn(label_ids, predicted_labels):
                accuracy = tf.metrics.accuracy(label_ids, predicted_labels)
                true_pos = tf.metrics.true_positives(
                    label_ids,
                    predicted_labels)
                true_neg = tf.metrics.true_negatives(
                    label_ids,
                    predicted_labels)
                false_pos = tf.metrics.false_positives(
                    label_ids,
                    predicted_labels)
                false_neg = tf.metrics.false_negatives(
                    label_ids,
                    predicted_labels)
                return {
                    "eval_accuracy": accuracy,
                    "true_positives": true_pos,
                    "true_negatives": true_neg,
                    "false_positives": false_pos,
                    "false_negatives": false_neg
                }

            eval_metrics = metric_fn(label_ids, predicted_labels)

            if mode == tf.estimator.ModeKeys.TRAIN:
                return tf.estimator.EstimatorSpec(mode=mode,
                                                  loss=loss,
                                                  train_op=train_op)
            else:
                return tf.estimator.EstimatorSpec(mode=mode,
                                                  loss=loss,
                                                  eval_metric_ops=eval_metrics)
        else:
            (predicted_labels, log_probs) = create_model(
                is_predicting, input_ids, input_mask, segment_ids, label_ids, num_labels)

            predictions = {
                'probabilities': log_probs,
                'labels': predicted_labels
            }
            return tf.estimator.EstimatorSpec(mode, predictions=predictions)

    # Return the actual model function in the closure
    return model_fn

In [18]:
# Compute train and warmup steps from batch size
# These hyperparameters are copied from this colab notebook (https://colab.sandbox.google.com/github/tensorflow/tpu/blob/master/tools/colab/bert_finetuning_with_cloud_tpus.ipynb)
BATCH_SIZE = 32
LEARNING_RATE = 1e-5
NUM_TRAIN_EPOCHS = 3.0
# Warmup is a period of time where hte learning rate 
# is small and gradually increases--usually helps training.
WARMUP_PROPORTION = 0.1
# Model configs
SAVE_CHECKPOINTS_STEPS = 500
SAVE_SUMMARY_STEPS = 100

In [19]:
# Compute # train and warmup steps from batch size
num_train_steps = int(len(train_features) / BATCH_SIZE * NUM_TRAIN_EPOCHS)
num_warmup_steps = int(num_train_steps * WARMUP_PROPORTION)

In [20]:
OUTPUT_DIR = 'model/'

In [21]:
# Specify outpit directory and number of checkpoint steps to save
run_config = tf.estimator.RunConfig(
    model_dir=OUTPUT_DIR,
    save_summary_steps=SAVE_SUMMARY_STEPS,
    save_checkpoints_steps=SAVE_CHECKPOINTS_STEPS)

In [22]:
model_fn = model_fn_builder(
  num_labels=len(label_list),
  learning_rate=LEARNING_RATE,
  num_train_steps=num_train_steps,
  num_warmup_steps=num_warmup_steps)

estimator = tf.estimator.Estimator(
  model_fn=model_fn,
  config=run_config,
  params={"batch_size": BATCH_SIZE})

INFO:tensorflow:Using config: {'_model_dir': 'model/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f71fb1815c0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


INFO:tensorflow:Using config: {'_model_dir': 'model/', '_tf_random_seed': None, '_save_summary_steps': 100, '_save_checkpoints_steps': 500, '_save_checkpoints_secs': None, '_session_config': allow_soft_placement: true
graph_options {
  rewrite_options {
    meta_optimizer_iterations: ONE
  }
}
, '_keep_checkpoint_max': 5, '_keep_checkpoint_every_n_hours': 10000, '_log_step_count_steps': 100, '_train_distribute': None, '_device_fn': None, '_protocol': None, '_eval_distribute': None, '_experimental_distribute': None, '_experimental_max_worker_delay_secs': None, '_session_creation_timeout_secs': 7200, '_service': None, '_cluster_spec': <tensorflow.python.training.server_lib.ClusterSpec object at 0x7f71fb1815c0>, '_task_type': 'worker', '_task_id': 0, '_global_id_in_cluster': 0, '_master': '', '_evaluation_master': '', '_is_chief': True, '_num_ps_replicas': 0, '_num_worker_replicas': 1}


In [23]:
# Create an input function for training. drop_remainder = True for using TPUs.
train_input_fn = bert.run_classifier.input_fn_builder(
    features=train_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=True,
    drop_remainder=False)

In [24]:
print(f'Beginning Training!')
current_time = datetime.now()
estimator.train(input_fn=train_input_fn, max_steps=num_train_steps)
print("Training took time ", datetime.now() - current_time)

Beginning Training!
Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


Instructions for updating:
Use Variable.read_value. Variables in 2.X are initialized automatically both in eager and graph (inside tf.defun) contexts.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.


Instructions for updating:
Please use `rate` instead of `keep_prob`. Rate should be set to `rate = 1 - keep_prob`.




















Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where


Instructions for updating:
Use tf.where in 2.0, which has the same broadcast rule as np.where
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Create CheckpointSaverHook.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Saving checkpoints for 0 into model/model.ckpt.


INFO:tensorflow:Saving checkpoints for 0 into model/model.ckpt.


INFO:tensorflow:loss = 1.1214805, step = 1


INFO:tensorflow:loss = 1.1214805, step = 1






















INFO:tensorflow:Saving checkpoints for 94 into model/model.ckpt.


INFO:tensorflow:Saving checkpoints for 94 into model/model.ckpt.


INFO:tensorflow:Loss for final step: 0.48558074.


INFO:tensorflow:Loss for final step: 0.48558074.


Training took time  0:06:52.361101


In [25]:
test_input_fn = run_classifier.input_fn_builder(
    features=test_features,
    seq_length=MAX_SEQ_LENGTH,
    is_training=False,
    drop_remainder=False)

In [26]:
estimator.evaluate(input_fn=test_input_fn, steps=None)

INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore
  "Converting sparse IndexedSlices to a dense Tensor of unknown shape. "


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Starting evaluation at 2020-03-07T20:05:28Z


INFO:tensorflow:Starting evaluation at 2020-03-07T20:05:28Z


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from model/model.ckpt-94


INFO:tensorflow:Restoring parameters from model/model.ckpt-94


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Finished evaluation at 2020-03-07-20:05:41


INFO:tensorflow:Finished evaluation at 2020-03-07-20:05:41


INFO:tensorflow:Saving dict for global step 94: eval_accuracy = 0.6944444, false_negatives = 11.0, false_positives = 11.0, global_step = 94, loss = 0.5937655, true_negatives = 90.0, true_positives = 140.0


INFO:tensorflow:Saving dict for global step 94: eval_accuracy = 0.6944444, false_negatives = 11.0, false_positives = 11.0, global_step = 94, loss = 0.5937655, true_negatives = 90.0, true_positives = 140.0


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 94: model/model.ckpt-94


INFO:tensorflow:Saving 'checkpoint_path' summary for global step 94: model/model.ckpt-94


{'eval_accuracy': 0.6944444,
 'false_negatives': 11.0,
 'false_positives': 11.0,
 'loss': 0.5937655,
 'true_negatives': 90.0,
 'true_positives': 140.0,
 'global_step': 94}

In [27]:
def getPrediction(in_sentences):
    labels = ['NAG', 'OAG', 'CAG']
    input_examples = [run_classifier.InputExample(
        guid="", text_a=x, text_b=None, label=0) for x in in_sentences]  # here, "" is just a dummy label
    input_features = run_classifier.convert_examples_to_features(
        input_examples, label_list, MAX_SEQ_LENGTH, tokenizer)
    predict_input_fn = run_classifier.input_fn_builder(
        features=input_features, seq_length=MAX_SEQ_LENGTH, is_training=False, drop_remainder=False)
    predictions = estimator.predict(predict_input_fn)
    return [(sentence, prediction['probabilities'], labels[prediction['labels']]) for sentence, prediction in zip(in_sentences, predictions)]


In [28]:
pred_sentences = list(test['tweets'])
type(pred_sentences)

list

In [29]:
predictions = getPrediction(pred_sentences)

INFO:tensorflow:Writing example 0 of 252


INFO:tensorflow:Writing example 0 of 252


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] ja ##b sunday ko team india se pakistan tak ##ray ##ega tab stadium me sir ##f tri ##nga la ##her ##ay ##ega # cw ##c ##15 # ind ##vs ##pa ##k # world ##cup http : / / t . co / d ##f ##h ##wo ##m ##13 ##7 ##b [SEP]


INFO:tensorflow:tokens: [CLS] ja ##b sunday ko team india se pakistan tak ##ray ##ega tab stadium me sir ##f tri ##nga la ##her ##ay ##ega # cw ##c ##15 # ind ##vs ##pa ##k # world ##cup http : / / t . co / d ##f ##h ##wo ##m ##13 ##7 ##b [SEP]


INFO:tensorflow:input_ids: 101 14855 2497 4465 12849 2136 2634 7367 4501 27006 9447 29107 21628 3346 2033 2909 2546 13012 13807 2474 5886 4710 29107 1001 19296 2278 16068 1001 27427 15088 4502 2243 1001 2088 15569 8299 1024 1013 1013 1056 1012 2522 1013 1040 2546 2232 12155 2213 17134 2581 2497 102 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 14855 2497 4465 12849 2136 2634 7367 4501 27006 9447 29107 21628 3346 2033 2909 2546 13012 13807 2474 5886 4710 29107 1001 19296 2278 16068 1001 27427 15088 4502 2243 1001 2088 15569 8299 1024 1013 1013 1056 1012 2522 1013 1040 2546 2232 12155 2213 17134 2581 2497 102 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] @ na ##ren ##dra ##mo ##di # shut ##down ##jn ##u we need education but not against nation [SEP]


INFO:tensorflow:tokens: [CLS] @ na ##ren ##dra ##mo ##di # shut ##down ##jn ##u we need education but not against nation [SEP]


INFO:tensorflow:input_ids: 101 1030 6583 7389 7265 5302 4305 1001 3844 7698 22895 2226 2057 2342 2495 2021 2025 2114 3842 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1030 6583 7389 7265 5302 4305 1001 3844 7698 22895 2226 2057 2342 2495 2021 2025 2114 3842 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] # ma ##uke ##pe ##cha ##uka ! india vs uae - india wins now turn for uae ma ##uka ma ##uka stars ##port ads . # ind ##vs ##sa , # ind ##vs ##ua ##e , # cw ##c ##15 http : / / t . co / kai ##m ##va ##zi ##2 ##z [SEP]


INFO:tensorflow:tokens: [CLS] # ma ##uke ##pe ##cha ##uka ! india vs uae - india wins now turn for uae ma ##uka ma ##uka stars ##port ads . # ind ##vs ##sa , # ind ##vs ##ua ##e , # cw ##c ##15 http : / / t . co / kai ##m ##va ##zi ##2 ##z [SEP]


INFO:tensorflow:input_ids: 101 1001 5003 15851 5051 7507 15750 999 2634 5443 17641 1011 2634 5222 2085 2735 2005 17641 5003 15750 5003 15750 3340 6442 14997 1012 1001 27427 15088 3736 1010 1001 27427 15088 6692 2063 1010 1001 19296 2278 16068 8299 1024 1013 1013 1056 1012 2522 1013 11928 2213 3567 5831 2475 2480 102 0 0 0 0


INFO:tensorflow:input_ids: 101 1001 5003 15851 5051 7507 15750 999 2634 5443 17641 1011 2634 5222 2085 2735 2005 17641 5003 15750 5003 15750 3340 6442 14997 1012 1001 27427 15088 3736 1010 1001 27427 15088 6692 2063 1010 1001 19296 2278 16068 8299 1024 1013 1013 1056 1012 2522 1013 11928 2213 3567 5831 2475 2480 102 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] # ind ##vs ##ua ##e b ##hu ##bane ##sw ##ar kumar is back and [SEP]


INFO:tensorflow:tokens: [CLS] # ind ##vs ##ua ##e b ##hu ##bane ##sw ##ar kumar is back and [SEP]


INFO:tensorflow:input_ids: 101 1001 27427 15088 6692 2063 1038 6979 27543 26760 2906 9600 2003 2067 1998 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1001 27427 15088 6692 2063 1038 6979 27543 26760 2906 9600 2003 2067 1998 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:*** Example ***


INFO:tensorflow:*** Example ***


INFO:tensorflow:guid: 


INFO:tensorflow:guid: 


INFO:tensorflow:tokens: [CLS] # shut ##down ##jn ##u j ##nu is the nursery of anti - nationals and terrorist - lovers . keep it trend ##ing ! let these traitor ##s know , their game is over . [SEP]


INFO:tensorflow:tokens: [CLS] # shut ##down ##jn ##u j ##nu is the nursery of anti - nationals and terrorist - lovers . keep it trend ##ing ! let these traitor ##s know , their game is over . [SEP]


INFO:tensorflow:input_ids: 101 1001 3844 7698 22895 2226 1046 11231 2003 1996 13640 1997 3424 1011 10342 1998 9452 1011 10205 1012 2562 2009 9874 2075 999 2292 2122 17328 2015 2113 1010 2037 2208 2003 2058 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_ids: 101 1001 3844 7698 22895 2226 1046 11231 2003 1996 13640 1997 3424 1011 10342 1998 9452 1011 10205 1012 2562 2009 9874 2075 999 2292 2122 17328 2015 2113 1010 2037 2208 2003 2058 1012 102 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:input_mask: 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 1 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:segment_ids: 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0 0


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:label: 0 (id = 0)


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Calling model_fn.


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Saver not created because there are no variables in the graph to restore


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Done calling model_fn.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Graph was finalized.


INFO:tensorflow:Restoring parameters from model/model.ckpt-94


INFO:tensorflow:Restoring parameters from model/model.ckpt-94


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Running local_init_op.


INFO:tensorflow:Done running local_init_op.


INFO:tensorflow:Done running local_init_op.


In [30]:
predictions

[('jab sunday ko team india se pakistan takrayega tab stadium me sirf tringa laherayega #cwc15 #indvspak #worldcup http://t.co/dfhwom137b',
  array([-0.07745104, -4.3284955 , -2.7913182 ], dtype=float32),
  'NAG'),
 ('@narendramodi #ShutDownJNU we need education but not against nation',
  array([-4.5315466 , -0.57805234, -0.8480573 ], dtype=float32),
  'OAG'),
 ('#maukepechauka! india vs uae - india wins now turn for uae mauka mauka starsport ads. #indvssa, #indvsuae, #cwc15 http://t.co/kaimvazi2z',
  array([-0.12864865, -4.393031  , -2.2223516 ], dtype=float32),
  'NAG'),
 ('#indvsuae bhubaneswar kumar is back and',
  array([-0.6535944, -2.6725497, -0.8897672], dtype=float32),
  'NAG'),
 ('#ShutDownJNU JNU IS THE NURSERY OF ANTI-NATIONALS AND TERRORIST-LOVERS. KEEP IT TRENDING! LET THESE TRAITORS KNOW, THEIR GAME IS OVER.',
  array([-4.587813 , -0.3960981, -1.149219 ], dtype=float32),
  'OAG'),
 ('They are fasting unto death! #StandwithJNU #StandforJNU #WithyouJNU http:// fb.me/7Qx6LZ

In [31]:
x = np.array(predictions)
np.shape(x)

(252, 3)

In [32]:
x

array([['jab sunday ko team india se pakistan takrayega tab stadium me sirf tringa laherayega #cwc15 #indvspak #worldcup http://t.co/dfhwom137b',
        array([-0.07745104, -4.3284955 , -2.7913182 ], dtype=float32),
        'NAG'],
       ['@narendramodi #ShutDownJNU we need education but not against nation',
        array([-4.5315466 , -0.57805234, -0.8480573 ], dtype=float32),
        'OAG'],
       ['#maukepechauka! india vs uae - india wins now turn for uae mauka mauka starsport ads. #indvssa, #indvsuae, #cwc15 http://t.co/kaimvazi2z',
        array([-0.12864865, -4.393031  , -2.2223516 ], dtype=float32),
        'NAG'],
       ['#indvsuae bhubaneswar kumar is back and',
        array([-0.6535944, -2.6725497, -0.8897672], dtype=float32),
        'NAG'],
       ['#ShutDownJNU JNU IS THE NURSERY OF ANTI-NATIONALS AND TERRORIST-LOVERS. KEEP IT TRENDING! LET THESE TRAITORS KNOW, THEIR GAME IS OVER.',
        array([-4.587813 , -0.3960981, -1.149219 ], dtype=float32),
        'OAG'],
 

In [33]:
tweets = []
predicted_sentiment = []
for i in range(len(predictions)):
    tweets.append(predictions[i][0])
    predicted_sentiment.append(predictions[i][2])

In [37]:
output_dict = {'tweets' : tweets, 'predicted_sentiment' : predicted_sentiment}

In [39]:
!mkdir output

BERTMelon.ipynb  data  model  output  README.md


In [40]:
pd.DataFrame(output_dict).to_excel('output/output_bert.xlsx', index = False)