# 1. Initial Setup
Downloading dependencies and setting up the notebook

## 1.1 Install Tensorflow 2.0

In [1]:
!pip uninstall tensorflow
!pip install tensorflow

Uninstalling tensorflow-1.15.0:
  Would remove:
    /usr/local/bin/estimator_ckpt_converter
    /usr/local/bin/freeze_graph
    /usr/local/bin/saved_model_cli
    /usr/local/bin/tensorboard
    /usr/local/bin/tf_upgrade_v2
    /usr/local/bin/tflite_convert
    /usr/local/bin/toco
    /usr/local/bin/toco_from_protos
    /usr/local/lib/python3.6/dist-packages/tensorflow-1.15.0.dist-info/*
    /usr/local/lib/python3.6/dist-packages/tensorflow/*
    /usr/local/lib/python3.6/dist-packages/tensorflow_core/*
Proceed (y/n)? y
  Successfully uninstalled tensorflow-1.15.0
Collecting tensorflow
[?25l  Downloading https://files.pythonhosted.org/packages/85/d4/c0cd1057b331bc38b65478302114194bd8e1b9c2bbc06e300935c0e93d90/tensorflow-2.1.0-cp36-cp36m-manylinux2010_x86_64.whl (421.8MB)
[K     |████████████████████████████████| 421.8MB 40kB/s 
Collecting tensorflow-estimator<2.2.0,>=2.1.0rc0
[?25l  Downloading https://files.pythonhosted.org/packages/18/90/b77c328a1304437ab1310b463e533fa7689f4bfc41549

## 1.2 Downloading External Libraries

In [2]:
!pip install transformers #HuggingFace implementation of BERT model
!pip install eli5 #eli5 library for model explaining

Collecting transformers
[?25l  Downloading https://files.pythonhosted.org/packages/13/33/ffb67897a6985a7b7d8e5e7878c3628678f553634bd3836404fef06ef19b/transformers-2.5.1-py3-none-any.whl (499kB)
[K     |████████████████████████████████| 501kB 9.3MB/s 
Collecting sentencepiece
[?25l  Downloading https://files.pythonhosted.org/packages/74/f4/2d5214cbf13d06e7cb2c20d84115ca25b53ea76fa1f0ade0e3c9749de214/sentencepiece-0.1.85-cp36-cp36m-manylinux1_x86_64.whl (1.0MB)
[K     |████████████████████████████████| 1.0MB 55.4MB/s 
Collecting sacremoses
[?25l  Downloading https://files.pythonhosted.org/packages/a6/b4/7a41d630547a4afd58143597d5a49e07bfd4c42914d8335b2a5657efc14b/sacremoses-0.0.38.tar.gz (860kB)
[K     |████████████████████████████████| 870kB 59.9MB/s 
Collecting tokenizers==0.5.2
[?25l  Downloading https://files.pythonhosted.org/packages/d1/3f/73c881ea4723e43c1e9acf317cf407fab3a278daab3a69c98dcac511c04f/tokenizers-0.5.2-cp36-cp36m-manylinux1_x86_64.whl (3.7MB)
[K     |██████████

## 1.3 Imports

In [3]:
import pandas as pd
import numpy as np
import tensorflow as tf
from tensorflow.keras.layers import Bidirectional
import transformers
import matplotlib.pyplot as plt
import re
import tensorflow_hub as hub

from sklearn.metrics import classification_report, f1_score, precision_score, recall_score, accuracy_score
from sklearn.model_selection import KFold
from sklearn.model_selection import train_test_split
from eli5.lime import TextExplainer


# Make sure we start with a clear graph
tf.keras.backend.clear_session()

Using TensorFlow backend.


## 1.4 Check to see if we have GPU available.

In [4]:
# Get the GPU device name.
device_name = tf.test.gpu_device_name()

# The device name should look like the following:
if device_name == '/device:GPU:0':
    print('Found GPU at: {}'.format(device_name))
else:
    raise SystemError('GPU device not found')

Found GPU at: /device:GPU:0


## 1.5 Mount google drive to notebook

In [5]:
from google.colab import drive
drive.mount('/content/drive')

Drive already mounted at /content/drive; to attempt to forcibly remount, call drive.mount("/content/drive", force_remount=True).


## 1.6 Load in Dataset

In [0]:
sample_submission = pd.read_csv("/content/drive/My Drive/real_or_not/sample_submission.csv")
test = pd.read_csv("/content/drive/My Drive/real_or_not/test.csv")
train = pd.read_csv("/content/drive/My Drive/real_or_not/train.csv")

In [7]:
#351 has hyperlink
train.head()

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1


# 2. Text Cleaning (might do more later)

In [0]:
def remove_URL(text):
    url = re.compile(r'https?://\S+|www\.\S+')
    return url.sub(r'',text)

def remove_emoji(text):
    emoji_pattern = re.compile("["u"\U0001F600-\U0001F64F"  # emoticons
                               u"\U0001F300-\U0001F5FF"  # symbols & pictographs
                               u"\U0001F680-\U0001F6FF"  # transport & map symbols
                               u"\U0001F1E0-\U0001F1FF"  # flags (iOS)
                               u"\U00002702-\U000027B0"
                               u"\U000024C2-\U0001F251"
                               "]+", flags=re.UNICODE)
        
    return emoji_pattern.sub(r'', text)

def remove_atsigns(text):
    text = re.sub(r'@+\w+', '', text)
    return text

def remove_special_chars(text):
    text = re.sub(r"\x89Û_", "", text)
    text = re.sub(r"\x89ÛÒ", "", text)
    text = re.sub(r"\x89ÛÓ", "", text)
    text = re.sub(r"\x89ÛÏWhen", "When", text)
    text = re.sub(r"\x89ÛÏ", "", text)
    text = re.sub(r"China\x89Ûªs", "China's", text)
    text = re.sub(r"let\x89Ûªs", "let's", text)
    text = re.sub(r"\x89Û÷", "", text)
    text = re.sub(r"\x89Ûª", "", text)
    text = re.sub(r"\x89Û\x9d", "", text)
    text = re.sub(r"å_", "", text)
    text = re.sub(r"\x89Û¢", "", text)
    text = re.sub(r"\x89Û¢åÊ", "", text)
    text = re.sub(r"fromåÊwounds", "from wounds", text)
    text = re.sub(r"åÊ", "", text)
    text = re.sub(r"åÈ", "", text)
    text = re.sub(r"JapÌ_n", "Japan", text)    
    text = re.sub(r"Ì©", "e", text)
    text = re.sub(r"å¨", "", text)
    text = re.sub(r"SuruÌ¤", "Suruc", text)
    text = re.sub(r"åÇ", "", text)
    text = re.sub(r"å£3million", "3 million", text)
    text = re.sub(r"åÀ", "", text)
    text = re.sub(r"&gt;", ">", text)
    text = re.sub(r"&lt;", "<", text)
    text = re.sub(r"&amp;", "&", text)
    text = re.sub(r"\n", " ", text)
    return text

def clean_repeating_punctuations(text):
    text = re.sub(r"\?{2,}","?", text)
    text = re.sub(r"!{2,}", "!", text)
    text = re.sub(r"\.{2,}", "...", text)
    return text

def expand_contractions(text):
    text = re.sub(r"\b(c|C)an't\b", 'can not', text)
    text = re.sub(r"\b(y|Y)ou're\b", 'you are', text)
    text = re.sub(r"\b(i|I)'ll\b", 'I will', text)
    text = re.sub(r"\b(s|S)houldn't\b", 'should not', text)
    text = re.sub(r"\b(w|W)ouldn't\b", 'would not', text)
    text = re.sub(r"\b(h|H)ere's\b", 'here is', text)
    text = re.sub(r"\b(i|I)t'll", 'it will', text)
    text = re.sub(r"\b(w|W)e'll\b", 'we will', text)
    text = re.sub(r"\b(t|T)hat's\b", 'that is', text)
    text = re.sub(r"\b(w|W)e're\b", 'we are', text)
    text = re.sub(r"\b(t|T)here's\b", 'there is', text)
    return text

def translate_slang(text):
    text = re.sub(r"\b(l|L)+(o|O|l|L)+(l|L)+\b", 'laugh out loud', text)
    text = re.sub(r"\b(r|R)(o|O)(f|F)(l|L)\b", 'rolling on the floor laughing', text)
    text = re.sub(r"\b(s|S)(m|M)(h|H)\b", 'shake my head', text)
    text = re.sub(r"\b(r|R)\.?(i|I)\.?(p|P)\b", 'rest in peace', text)
    text = re.sub(r"\b(o|O)(m|M)(g|G)\b", 'oh my god', text)
    text = re.sub(r"\b(w|W)(t|T)(f|F)+\b", "what the fuck", text)
    text = re.sub(r"\b(w|W)(t|T)(h|H)+\b", "what the hell", text)
    text = re.sub(r"\b(f|F)(k|K)\b", "fuck", text)
    return text

train['text'] = train['text'].apply(lambda x : x.lower())
train['text'] = train['text'].apply(lambda x : x.rstrip())
train['text'] = train['text'].apply(lambda x : remove_URL(x))
train['text'] = train['text'].apply(lambda x : remove_emoji(x))
train['text'] = train['text'].apply(lambda x : remove_atsigns(x))
train['text'] = train['text'].apply(lambda x : remove_special_chars(x))
train['text'] = train['text'].apply(lambda x : clean_repeating_punctuations(x))
train['text'] = train['text'].apply(lambda x : expand_contractions(x))
train['text'] = train['text'].apply(lambda x : translate_slang(x))

# 3. Building BERT Models:
In this section we will be building some deep learning models all having a BERT base. Ranked from least complex to most comples, they are as follows:
1. Simple BERT for sequence classification
2. BERT + non-linear layers
3. BERT + bi-lstm

**NOTES** Fine-Tuning of the entire BERT model leads to major overfitting. Maybe it is best to unfreeze only the top couple of layers?

In [0]:
# Custom keras callback so we can measure f1_score
class BertF1Callback(tf.keras.callbacks.Callback):
    def __init__(self, input_ids_train, input_ids_val, 
                 attention_masks_train, attention_masks_val,
                 input_type_ids_train, input_type_ids_val,
                 y_train, y_val, model_type):
      
        super(tf.keras.callbacks.Callback, self).__init__()
        self.input_ids_train = input_ids_train
        self.input_ids_val = input_ids_val
        self.attention_masks_train = attention_masks_train
        self.attention_masks_val = attention_masks_val
        self.input_type_ids_train = input_type_ids_train
        self.input_type_ids_val = input_type_ids_val
        self.y_train = np.argmax(y_train, axis=1)
        self.y_val = np.argmax(y_val, axis=1)
        self.model_type = model_type
        self.best_f1_score = 0

    def on_epoch_end(self, epoch, logs = {}):
        train_predictions = self.model.predict([self.input_ids_train,
                                                self.attention_masks_train,
                                                self.input_type_ids_train])
        #Change probabilities to binary labels (MIGHT CHANGE LATER)
        train_predictions = np.argmax(train_predictions, axis=1)
        train_f1 = f1_score(self.y_train, train_predictions, average = 'macro')
        train_acc = accuracy_score(self.y_train, train_predictions)

        val_predictions = self.model.predict([self.input_ids_val,
                                              self.attention_masks_val,
                                              self.input_type_ids_val])
        #Change probabilities to binary labels (MIGHT CHANGE LATER)
        val_predictions = np.argmax(val_predictions, axis=1)

        val_f1 = f1_score(self.y_val, val_predictions, average = 'macro')

        val_acc = accuracy_score(self.y_val, val_predictions)

        print('\nEpoch = {}  train_acc = {:.6}  train_f1 = {:.6} val_acc = {:.6}  val_f1 = {:.6}\n'.format(epoch+1, train_acc, train_f1, val_acc, val_f1))

        if val_f1 > self.best_f1_score:
            self.best_f1_score = val_f1
            print("Saving Model...")
            self.model.save_weights('/content/drive/My Drive/real_or_not/{}_weights.h5'.format(self.model_type))


In [0]:
class BertModel():
    '''
    model type defines which model to create:
        simple = Simple bert for sequence classification
        bert_nn = Bert + non-linear layers
        bert_lstm = bert + bi-lstm
    '''
    def __init__(self, max_len = 256, model_type = 'simple', fine_tune = True, dropout = 0.5):
        self.max_len = max_len
        self.model_type = model_type
        self.fine_tune = fine_tune
        self.log = {}
        self.tokenizer = transformers.BertTokenizer.from_pretrained('bert-base-uncased')
        self.model = self.build_model(max_len, dropout)
        
    def build_model(self, max_len, dropout = 0.2):
        # BERT takes 3 inputs:
        #    input_ids - a list of int which acts like bag of words
        #    attention_masks - a list of 1 or 0, specifies which words the model should pay attention to.
        #    input_type_ids - a list of all 0's for our use case. It's needed for other applications like question answering
        input_ids = tf.keras.layers.Input(shape = (max_len, ), dtype = 'int32', name = 'input_ids')
        attention_masks = tf.keras.layers.Input(shape = (max_len, ), dtype = 'int32', name = 'attention_masks')
        input_type_ids = tf.keras.layers.Input(shape = (max_len, ), dtype = 'int32', name = 'input_type_ids')
        
        # BERT Layer
        bert_layer = transformers.TFBertModel.from_pretrained('bert-base-uncased')
        
        if not self.fine_tune:
            bert_layer.trainable = False

        else:
            #Enable training for last 3 transformer layers
            bert_layer.trainable = False
            for w in bert_layer.weights[-66:]:
                w._trainable = True
        
        seq_output, _ = bert_layer([input_ids, attention_masks, input_type_ids])
        out = tf.keras.layers.GlobalAveragePooling1D()(seq_output)
        
        if self.model_type == 'simple':    
            out = tf.keras.layers.Dense(2, activation='softmax')(out)
            
        if self.model_type == 'bert_nn':
            out = tf.keras.layers.Dense(256, activation='relu')(out)
            out = tf.keras.layers.Dense(128, activation='relu')(out)
            out = tf.keras.layers.Dropout(dropout)(out)
            out = tf.keras.layers.Dense(2, activation='softmax')(out)
            
        if self.model_type == 'bert_lstm':
            out = tf.keras.layers.Dropout(dropout)(seq_output)
            out = tf.keras.layers.Conv1D(64, 5, padding='valid', activation='relu', strides=1)(out)
            out = tf.keras.layers.MaxPooling1D(pool_size=4)(out)
            out = Bidirectional(tf.keras.layers.LSTM(64))(out)
            out = tf.keras.layers.Dense(2, activation='softmax')(out)
        
        optim = tf.keras.optimizers.Adam(learning_rate = 3e-5)
        model = tf.keras.models.Model(inputs = [input_ids, attention_masks, input_type_ids], outputs = out)
        model.compile(optimizer = optim, loss = 'categorical_crossentropy', metrics = ['accuracy'])
        return model
    
    def fit(self, texts, labels, n_split = 5, epochs = 3, batch_size = 16, cross_validate = True, val_data = ()):
        input_ids, attention_masks, input_type_ids, y = self.bert_decode(texts, labels)

        if cross_validate:
            kfold = KFold(n_splits = n_split, shuffle = True, random_state = 2020)
            fold = 1

            for train_index, val_index in kfold.split(y):

                print("================== FOLD {} ======================".format(fold))

                # Free up some GPU memory so we dont OOM
                tf.keras.backend.clear_session()

                # Split data train/val
                input_ids_train, input_ids_val = input_ids[train_index], input_ids[val_index]
                attention_masks_train, attention_masks_val = attention_masks[train_index], attention_masks[val_index]
                input_type_ids_train, input_type_ids_val = input_type_ids[train_index], input_type_ids[val_index]
                y_train, y_val = y[train_index], y[val_index]

                # Custom f1_score callback (also saves the best model)
                f1_callback = BertF1Callback(input_ids_train, input_ids_val,
                                       attention_masks_train, attention_masks_val,
                                       input_type_ids_train, input_type_ids_val,
                                       y_train, y_val, self.model_type)
                
                # Rebuild model for every fold
                self.model = self.build_model(self.max_len)
                self.model.fit([input_ids_train, attention_masks_train, input_type_ids_train], y_train,
                              epochs = epochs,
                              batch_size = batch_size,
                              callbacks=[f1_callback])
                
                fold += 1

                
        else:
            tf.keras.backend.clear_session()
            val_texts, val_labels = val_data
            input_ids_val, attention_masks_val, input_type_ids_val, y_val = self.bert_decode(val_texts, val_labels)

            f1_callback = BertF1Callback(input_ids, input_ids_val,
                                       attention_masks, attention_masks_val,
                                       input_type_ids, input_type_ids_val,
                                       y, y_val, self.model_type)

            self.model.fit([input_ids, attention_masks, input_type_ids], y,
                           epochs = epochs,
                           batch_size = batch_size,
                           callbacks = [f1_callback],
                           shuffle = True)   
        
    def bert_decode(self, texts, labels = None):
        input_ids = []
        attention_masks = []
        input_type_ids = []

        for text in texts:
            tokenized_output = self.tokenizer.encode_plus(text, add_special_tokens=True, max_length = self.max_len, pad_to_max_length = True)
            input_ids.append(tokenized_output['input_ids'])
            attention_masks.append(tokenized_output['attention_mask'])
            input_type_ids.append(tokenized_output['token_type_ids'])

        if labels is None:
            return np.array(input_ids), np.array(attention_masks), np.array(input_type_ids)
        
        else:
            labels = np.array(labels)
            labels = tf.keras.utils.to_categorical(labels)
            return np.array(input_ids), np.array(attention_masks), np.array(input_type_ids), labels
            

    def summary(self):
        print(self.model.summary())

    def predict(self, texts):

        if type(texts) == str:
            input_id, attention_masks, input_type_ids = self.bert_decode([texts])

        else:
            input_ids, attention_masks, input_type_ids = self.bert_decode(texts)

        predictions = self.model.predict([input_ids, attention_masks, input_type_ids])
        return predictions




## 3.1 Fit a simple BERT + NN model

In [0]:
text_train, text_test, target_train, target_test = train_test_split(train.text.values,
                                                                    train.target.values, 
                                                                    test_size = 0.1,
                                                                    random_state = 2020)

In [19]:
model = BertModel(max_len = 128, model_type = 'bert_nn', fine_tune=True)
model.summary()

HBox(children=(IntProgress(value=0, description='Downloading', max=231508, style=ProgressStyle(description_wid…


Model: "model"
__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_ids (InputLayer)          [(None, 128)]        0                                            
__________________________________________________________________________________________________
attention_masks (InputLayer)    [(None, 128)]        0                                            
__________________________________________________________________________________________________
input_type_ids (InputLayer)     [(None, 128)]        0                                            
__________________________________________________________________________________________________
tf_bert_model_1 (TFBertModel)   ((None, 128, 768), ( 109482240   input_ids[0][0]                  
                                                                 attention_masks[0][0]       

In [0]:
model.fit(text_train, 
          target_train,
          epochs = 20,
          batch_size = 32, 
          cross_validate = True, 
          val_data = (text_test, target_test))

Train on 5480 samples
Epoch 1/20
Epoch = 1  train_acc = 0.772445  train_f1 = 0.750507 val_acc = 0.779723  val_f1 = 0.759562

Saving Model...
Epoch 2/20
Epoch = 2  train_acc = 0.794891  train_f1 = 0.784046 val_acc = 0.808169  val_f1 = 0.799184

Saving Model...
Epoch 3/20
Epoch = 3  train_acc = 0.807664  train_f1 = 0.799832 val_acc = 0.824216  val_f1 = 0.818069

Saving Model...
Epoch 4/20
Epoch = 4  train_acc = 0.816606  train_f1 = 0.808892 val_acc = 0.828592  val_f1 = 0.821992

Saving Model...
Epoch 5/20
Epoch = 5  train_acc = 0.820255  train_f1 = 0.811594 val_acc = 0.83078  val_f1 = 0.82336

Saving Model...
Epoch 6/20
Epoch = 6  train_acc = 0.823175  train_f1 = 0.816126 val_acc = 0.832969  val_f1 = 0.827032

Saving Model...
Epoch 7/20
Epoch = 7  train_acc = 0.82427  train_f1 = 0.817237 val_acc = 0.835157  val_f1 = 0.82925

Saving Model...
Epoch 8/20
Epoch = 8  train_acc = 0.826825  train_f1 = 0.819867 val_acc = 0.838074  val_f1 = 0.832082

Saving Model...
Epoch 9/20
Epoch = 9  train_ac

## 3.2 Model Interpretation with ELI5 Library
**Findings**
- Words with positive sentiment seem to correlate with Fake disaster category. This makes sense because people view a disaster as negative.
- Certain keywords are a great indicator of real or fake disaster. Most natural disaster keywords identify as "real".

Overall the model is making reasonable predictions based on the text provided. It is hard to distinguish between somebody talking about previous disasters that occured versus current disasters.
- For example, somebody talking about their experience with 9/11. Not a real disaster, but the tweet would be very similar to somebody describing a current plane crash event.

### 3.2.1 Get correct/incorrect predictions from the model

In [0]:
predictions = model.predict(list(text_test))
predictions = np.argmax(predictions, axis=1)

correct_indices = []
incorrect_indices = []

for i in range(len(predictions)):
  if predictions[i] == target_test[i]:
    correct_indices.append(i)
  else:
    incorrect_indices.append(i)

correct_indices = np.array(correct_indices)
incorrect_indices = np.array(incorrect_indices)

### 3.2.2 Take a look at correct classifications

In [99]:
from eli5.lime import TextExplainer

index = np.random.choice(correct_indices)

te = TextExplainer(random_state=42, char_based = False)
te.fit(text_test[index], model.predict)
print(text_test[index])
print("true label = {}".format(target_test[index]))
te.show_prediction(target_names=[0, 1], top = (-10,10))

first responders get int  free on saturday! details @ 
true label = 0


Contribution?,Feature
+0.749,<BIAS>
+0.727,get
+0.631,free
+0.519,first
+0.496,details
+0.287,on
… 2 more positive …,… 2 more positive …
-0.039,saturday details
-0.056,int free
-0.145,free on


In [100]:
index = np.random.choice(correct_indices)

te.fit(text_test[index], model.predict)
print(text_test[index])
print("true label = {}".format(target_test[index]))
te.show_prediction(target_names=[0, 1], top = (-10,10))

meet the man who survived both hiroshima and nagasaki  
true label = 1


Contribution?,Feature
+1.839,nagasaki
+1.249,hiroshima
+0.881,hiroshima and
+0.706,both hiroshima
+0.699,who survived
+0.531,survived
+0.484,survived both
+0.398,man who
… 1 more positive …,… 1 more positive …
-0.200,and nagasaki


In [101]:
index = np.random.choice(correct_indices)

te.fit(text_test[index], model.predict)
print(text_test[index])
print("true label = {}".format(target_test[index]))
te.show_prediction(target_names=[0, 1], top = (-10,10))

mourning notices for stabbing arson victims stir û÷politics of griefûª in israel 
true label = 1


Contribution?,Feature
+1.647,arson
+1.103,victims
+0.949,mourning
+0.552,stabbing
+0.463,israel
+0.435,in
+0.382,politics
+0.300,victims stir
+0.263,in israel
+0.202,stir


### 3.2.3 Take a look at incorrect classifications

In [102]:
index = np.random.choice(incorrect_indices)

te.fit(text_test[index], model.predict)
print(text_test[index])
print("true label = {}".format(target_test[index]))
te.show_prediction(target_names=[0, 1], top = (-10,10))

rt kurtkamka: beautiful desolation. just me a couple of coyotes some lizards and the morning sun. #phoenix #arizû_ 
true label = 1


Contribution?,Feature
+0.632,<BIAS>
+0.449,û_
+0.408,ariz
+0.344,me
+0.264,beautiful
+0.178,rt kurtkamka
+0.174,couple
+0.118,kurtkamka
+0.106,some
+0.064,sun


In [103]:
index = np.random.choice(incorrect_indices)

te.fit(text_test[index], model.predict)
print(text_test[index])
print("true label = {}".format(target_test[index]))
te.show_prediction(target_names=[0, 1], top = (-10,10))

ashley and i on going to hurricane harbor friday. ? 
true label = 1


Contribution?,Feature
+0.955,going
+0.784,to hurricane
+0.634,i
+0.513,to
+0.486,and
+0.479,ashley
+0.457,<BIAS>
+0.343,on
… 1 more positive …,… 1 more positive …
-0.031,hurricane harbor


In [104]:
index = np.random.choice(incorrect_indices)

te.fit(text_test[index], model.predict)
print(text_test[index])
print("true label = {}".format(target_test[index]))
te.show_prediction(target_names=[0, 1], top = (-10,10))

december 2011 court dismissed the group charge of rape victims saying 'getting raped was an occupational hazard...!'  us military = isis!
true label = 0


Contribution?,Feature
+0.795,isis
+0.555,military isis
+0.478,victims
+0.415,december
+0.372,military
+0.350,raped was
+0.287,us military
+0.282,court dismissed
+0.231,getting raped
+0.207,occupational hazard


# 4. Building Universal Sentence Encoder (USE) Models

**NOTES** 
- Fine-tuning entire USE model results in major overfitting. Maybe its better to unfreeze only the top few layers?
- USE embeddings seem to perform better than BERT (roughly 2% better). Will have to perform cross-validation on both models to ensure this is just random chance.

In [0]:
# Custom keras callback so we can measure f1_score
class UseF1Callback(tf.keras.callbacks.Callback):
  def __init__(self, X_train, X_val, y_train, y_val):
    self.X_train = X_train
    self.X_val = X_val
    self.y_train = np.argmax(y_train, axis=1)
    self.y_val = np.argmax(y_val, axis=1)
    self.best_f1_score = 0

  def on_epoch_end(self, epoch, logs = {}):
    train_predictions = self.model.predict(self.X_train)
    train_predictions = np.argmax(train_predictions, axis=1)
    train_acc = accuracy_score(self.y_train, train_predictions)
    train_f1 = f1_score(self.y_train, train_predictions, average='macro')

    val_predictions = self.model.predict(self.X_val)
    val_predictions = np.argmax(val_predictions, axis=1)
    val_acc = accuracy_score(self.y_val, val_predictions)
    val_f1 = f1_score(self.y_val, val_predictions, average='macro')

    print('\nEpoch = {}  train_acc = {:.6}  train_f1 = {:.6} val_acc = {:.6}  val_f1 = {:.6}\n'.format(epoch+1, train_acc, train_f1, val_acc, val_f1))
    
    if val_f1 > self.best_f1_score:
      self.best_f1_score = val_f1
      print("Saving Model...")
      self.model.save_weights('/content/drive/My Drive/real_or_not/useModel_weights.h5')


In [0]:
class UseModel():
  def __init__(self, fine_tune = True, dropout = 0.2):
    self.fine_tune = fine_tune
    self.model = self.build_model(dropout)

  def build_model(self, dropout = 0.2):
    module_url = 'https://tfhub.dev/google/universal-sentence-encoder-large/5'
    use_layer = hub.KerasLayer(module_url, trainable=self.fine_tune, name='USE_embedding')

    model = tf.keras.models.Sequential([
          tf.keras.layers.Input(shape = [], dtype=tf.string),
          use_layer,
          tf.keras.layers.Dense(512,activation='relu'),
          tf.keras.layers.Dense(256,activation='relu'),
          tf.keras.layers.Dropout(dropout),
          tf.keras.layers.Dense(128,activation='relu'),
          tf.keras.layers.Dense(64,activation='relu'),
          tf.keras.layers.Dropout(dropout),
          tf.keras.layers.Dense(32,activation='relu'),
          tf.keras.layers.Dense(2,activation='softmax'),
    ])
    optim = tf.keras.optimizers.Adam(learning_rate = 3e-5)
    model.compile(optimizer = optim, loss = 'categorical_crossentropy', metrics = ['accuracy'])
    return model

  def fit(self, texts, labels, n_split = 4, epochs = 3, batch_size = 16, cross_validate = True, val_data = ()):
    y = tf.keras.utils.to_categorical(labels)
    if cross_validate:
      kfold = KFold(n_splits = n_split, shuffle = True, random_state = 2020)
      fold = 1
      for train_index, val_index in kfold.split(y):
        # Free up some GPU memory so we dont OOM
        tf.keras.backend.clear_session()

        X_train, X_val = texts[train_index], texts[val_index]
        y_train, y_val = y[train_index], y[val_index]

        f1_callback = UseF1Callback(X_train, X_val, y_train, y_val)

        self.model.fit(X_train, y_train,
                       epochs = epochs,
                       batch_size = batch_size,
                       callbacks = [f1_callback],
                       shuffle = True)
        
    else:
      # Free up some GPU memory so we dont OOM
      tf.keras.backend.clear_session()
      
      X_train = texts
      y_train = y
      X_val, y_val = val_data
      y_val = tf.keras.utils.to_categorical(y_val)

      f1_callback = UseF1Callback(X_train, X_val, y_train, y_val)

      self.model.fit(X_train, y_train,
                     epochs = epochs,
                     batch_size = batch_size,
                     callbacks = [f1_callback])
      
  def predict(self, texts):
    if type(texts) == str:
      predictions = self.model.predict([texts])

    else:
      predictions = self.model.predict(texts)

    return predictions
      
  def summary(self):
    print(self.model.summary())
      

### 4.1 Fit USE Model

In [47]:
use_model = UseModel()
use_model.summary()

Model: "sequential"
_________________________________________________________________
Layer (type)                 Output Shape              Param #   
USE_embedding (KerasLayer)   (None, 512)               147354880 
_________________________________________________________________
dense (Dense)                (None, 512)               262656    
_________________________________________________________________
dense_1 (Dense)              (None, 256)               131328    
_________________________________________________________________
dropout (Dropout)            (None, 256)               0         
_________________________________________________________________
dense_2 (Dense)              (None, 128)               32896     
_________________________________________________________________
dense_3 (Dense)              (None, 64)                8256      
_________________________________________________________________
dropout_1 (Dropout)          (None, 64)                0

In [48]:
use_model.fit(text_train, 
          target_train,
          epochs = 20,
          batch_size = 32, 
          cross_validate = False, 
          val_data = (text_test, target_test))

Train on 6851 samples
Epoch 1/20








Epoch = 1  train_acc = 0.856809  train_f1 = 0.852848 val_acc = 0.824147  val_f1 = 0.820939

Saving Model...
Epoch 2/20
Epoch = 2  train_acc = 0.897825  train_f1 = 0.893691 val_acc = 0.826772  val_f1 = 0.821072

Saving Model...
Epoch 3/20
Epoch = 3  train_acc = 0.925558  train_f1 = 0.922939 val_acc = 0.82021  val_f1 = 0.815559

Epoch 4/20
Epoch = 4  train_acc = 0.94395  train_f1 = 0.941965 val_acc = 0.814961  val_f1 = 0.809146

Epoch 5/20
Epoch = 5  train_acc = 0.962049  train_f1 = 0.960943 val_acc = 0.812336  val_f1 = 0.808843

Epoch 6/20
Epoch = 6  train_acc = 0.972413  train_f1 = 0.971726 val_acc = 0.812336  val_f1 = 0.809245

Epoch 7/20
Epoch = 7  train_acc = 0.976354  train_f1 = 0.975819 val_acc = 0.793963  val_f1 = 0.791609

Epoch 8/20
Epoch = 8  train_acc = 0.981317  train_f1 = 0.980884 val_acc = 0.80315  val_f1 = 0.799975

Epoch 9/20
Epoch = 9  train_acc = 0.984966  train_f1 = 0.984621 val_acc = 0.799213  val_f1 = 0.795905

Epoch 10/20
Epoch = 10  train_acc = 0.98555  train_f1 =

### Model Interpretation with ELI5 Library

### Get correct/incorrect predictions from the model

In [0]:
predictions = use_model.predict(list(text_test))
predictions = np.argmax(predictions, axis=1)

correct_indices = []
incorrect_indices = []

for i in range(len(predictions)):
  if predictions[i] == target_test[i]:
    correct_indices.append(i)
  else:
    incorrect_indices.append(i)

correct_indices = np.array(correct_indices)
incorrect_indices = np.array(incorrect_indices)

### Take a look at correct classifications

In [54]:
te = TextExplainer(random_state=42, char_based = False)

index = np.random.choice(correct_indices)

te.fit(text_test[index], use_model.predict)
print(text_test[index])
print("true label = {}".format(target_test[index]))
te.show_prediction(target_names=[0, 1], top = (-10,10))

? 19th day since 17-jul-2015 -- nigeria: suicide bomb attacks killed 64 people; blamed: boko haram [l.a. times/ap] | 
true label = 1


Contribution?,Feature
+2.440,attacks
+2.276,killed
+1.195,boko
+1.151,bomb
+0.895,killed 64
+0.859,blamed
+0.834,suicide
+0.826,blamed boko
+0.794,nigeria suicide
+0.733,nigeria


In [59]:
index = np.random.choice(correct_indices)

te.fit(text_test[index], use_model.predict)
print(text_test[index])
print("true label = {}".format(target_test[index]))
te.show_prediction(target_names=[0, 1], top = (-10,10))

  parents of colorado theater shooting victim fear copycat massacre   #antioch  
true label = 1


Contribution?,Feature
+3.342,massacre
+2.335,shooting
+1.363,theater shooting
+1.302,victim
+1.009,antioch
+0.733,colorado
+0.733,copycat massacre
+0.700,of colorado
+0.642,shooting victim
+0.515,parents of


In [61]:
index = np.random.choice(correct_indices)

te.fit(text_test[index], use_model.predict)
print(text_test[index])
print("true label = {}".format(target_test[index]))
te.show_prediction(target_names=[0, 1], top = (-10,10))

rocky fire in northern california swells to 60000 acres; 12000 evacuated  portland #phoenix #miami #atlanta #casper
true label = 1


Contribution?,Feature
+3.550,evacuated
+2.418,swells
+2.064,fire
+1.590,fire in
+1.420,california swells
+0.937,swells to
+0.893,rocky fire
+0.830,evacuated portland
+0.715,12000 evacuated
+0.714,northern


### Take a look at incorrect classifications

In [62]:
index = np.random.choice(incorrect_indices)

te.fit(text_test[index], use_model.predict)
print(text_test[index])
print("true label = {}".format(target_test[index]))
te.show_prediction(target_names=[0, 1], top = (-10,10))

 my grandfather was set to be in the first groups of marines to hit japan in operation olympic. 95% casualty rate predictions
true label = 1


Contribution?,Feature
+3.340,casualty
+1.530,hit japan
+1.184,was
+1.166,95 casualty
+0.982,marines
+0.803,hit
+0.663,operation
+0.511,in the
+0.402,japan in
+0.306,japan


In [63]:
index = np.random.choice(incorrect_indices)

te.fit(text_test[index], use_model.predict)
print(text_test[index])
print("true label = {}".format(target_test[index]))
te.show_prediction(target_names=[0, 1], top = (-10,10))

why some traffic is freezing cold and some blazing hot ûò and how to heat up some of your traffic 
true label = 0


Contribution?,Feature
+1.395,how to
+1.251,why some
+1.198,blazing hot
+1.155,cold
+1.136,is
+0.944,heat
+0.919,some traffic
+0.860,traffic
+0.813,freezing
+0.761,ûò


In [64]:
index = np.random.choice(incorrect_indices)

te.fit(text_test[index], use_model.predict)
print(text_test[index])
print("true label = {}".format(target_test[index]))
te.show_prediction(target_names=[0, 1], top = (-10,10))

world annihilation vs self transformation  aliens attack to exterminate humans 
true label = 0


Contribution?,Feature
+2.523,aliens attack
+1.703,attack
+1.449,vs self
+1.407,annihilation vs
+1.349,annihilation
+1.096,transformation aliens
+1.018,attack to
+0.870,to exterminate
+0.780,world annihilation
+0.695,exterminate humans
