In [None]:
import numpy as np 
import pandas as pd
from sklearn.model_selection import train_test_split
import matplotlib.pyplot as plt
import seaborn as sns
from sklearn.metrics import accuracy_score, confusion_matrix

In [None]:
from google.colab import drive
drive.mount('/content/drive')
%cd /content/drive

Mounted at /content/drive
/content/drive


In [None]:
df = pd.read_csv('MyDrive/tweets.csv')
df.head()

Unnamed: 0,id,keyword,location,text,target
0,0,ablaze,,"Communal violence in Bhainsa, Telangana. ""Ston...",1
1,1,ablaze,,Telangana: Section 144 has been imposed in Bha...,1
2,2,ablaze,New York City,Arsonist sets cars ablaze at dealership https:...,1
3,3,ablaze,"Morgantown, WV",Arsonist sets cars ablaze at dealership https:...,1
4,4,ablaze,,"""Lord Jesus, your love brings freedom and pard...",0


In [None]:
df.shape

(11370, 5)

In [None]:
df.info()


In [None]:
df = df.drop(['id','keyword','location'],axis=1)
df.head()

Unnamed: 0,text,target
0,"Communal violence in Bhainsa, Telangana. ""Ston...",1
1,Telangana: Section 144 has been imposed in Bha...,1
2,Arsonist sets cars ablaze at dealership https:...,1
3,Arsonist sets cars ablaze at dealership https:...,1
4,"""Lord Jesus, your love brings freedom and pard...",0


In [None]:
df['target'].value_counts()

0    9256
1    2114
Name: target, dtype: int64

In [None]:
#@title Dealing with imbalanced dataset  
# There are 9256 entries for label ‘0’ and only 2114 entries for label ‘1’.
# at random, 7142 entries from label 0 will be dropped so that both classes are uniform

df_0_class = df[df['target']==0]
df_1_class = df[df['target']==1]
df_0_class_undersampled = df_0_class.sample(df_1_class.shape[0])
df = pd.concat([df_0_class_undersampled, df_1_class], axis=0)

In [None]:
df.head()

Unnamed: 0,text,target
7384,Grandson of the Terrorist Mastermind of the 19...,0
11229,“One of the penalties of an ecological educati...,0
1417,Some new BODY BAGS + ONLY DEATH grips https://...,0
2328,"2,400 jobs are at stake should the deal fall t...",0
5041,retweet please. May this effort give the way t...,0


In [None]:
df['target'].value_counts()

0    2114
1    2114
Name: target, dtype: int64

In [None]:
from sklearn.model_selection import train_test_split
X_train, X_test, y_train, y_test = train_test_split(df['text'],df['target'], stratify=df['target'])

In [None]:
!pip install tensorflow-text

In [None]:
import tensorflow as tf
import tensorflow_hub as hub
import tensorflow_text as text

In [None]:
preprocess = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_preprocess/3")
encoder = hub.KerasLayer("https://tfhub.dev/tensorflow/bert_en_uncased_L-12_H-768_A-12/4")



In [None]:
text_input = tf.keras.layers.Input(shape=(), dtype=tf.string, name='text-layer')
preprocessed_text = preprocess(text_input)
outputs = encoder(preprocessed_text)
d_layer = tf.keras.layers.Dropout(0.1, name="dropout-layer")(outputs['pooled_output'])
d_layer = tf.keras.layers.Dense(1, activation='sigmoid', name="output")(d_layer)
model = tf.keras.Model(inputs=[text_input], outputs = [d_layer])

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


In [None]:
model.summary()

Model: "model"
__________________________________________________________________________________________________
 Layer (type)                   Output Shape         Param #     Connected to                     
 text-layer (InputLayer)        [(None,)]            0           []                               
                                                                                                  
 keras_layer (KerasLayer)       {'input_type_ids':   0           ['text-layer[0][0]']             
                                (None, 128),                                                      
                                 'input_mask': (Non                                               
                                e, 128),                                                          
                                 'input_word_ids':                                                
                                (None, 128)}                                                  

In [None]:
m= [
      tf.keras.metrics.BinaryAccuracy(name='accuracy'),
      tf.keras.metrics.Precision(name='precision'),
      tf.keras.metrics.Recall(name='recall')
]
model.compile(optimizer='adam', loss='binary_crossentropy', metrics=m)

In [None]:
model.fit(X_train, y_train, epochs=20)

Epoch 1/20
Epoch 2/20
Epoch 3/20
Epoch 4/20
Epoch 5/20
Epoch 6/20
Epoch 7/20
Epoch 8/20
Epoch 9/20
Epoch 10/20
Epoch 11/20
Epoch 12/20
Epoch 13/20
Epoch 14/20
Epoch 15/20
Epoch 16/20
Epoch 17/20
Epoch 18/20
Epoch 19/20
Epoch 20/20


<keras.callbacks.History at 0x7fefae59b370>

In [None]:
model.evaluate(X_test, y_test)



[0.4644382894039154,
 0.8003784418106079,
 0.7835420370101929,
 0.8295454382896423]

In [None]:
import json

In [None]:
model_json = model.to_json()
with open("MyDrive/model.json", "w") as json_file:
  json_file.write(model_json)
# serialize weights to HDF5
model.save_weights("MyDrive/model.h5")
print("Saved model to drive")

Saved model to drive


In [None]:
#@title Testing the saved model 
from tensorflow.keras.models import model_from_json
from tensorflow_hub import KerasLayer
import json

In [None]:
# Load the model
with open('MyDrive/model.json', 'r') as json_file:
    model_json = json_file.read()
# model = tf.keras.models.model_from_json(model_json)
custom_objects = {"KerasLayer": KerasLayer}
model = tf.keras.models.model_from_json(model_json, custom_objects=custom_objects)
# Load the weights
model.load_weights('MyDrive/model.h5')
print("Loaded model from disk")

Instructions for updating:
Lambda fuctions will be no more assumed to be used in the statement where they are used, or at least in the same block. https://github.com/tensorflow/tensorflow/issues/56089


Loaded model from disk


In [None]:
X_test

10803    Biden, Buttigieg and Sanders supporters have b...
3831                     love you so much it makes me sick
10274    Preliminary tornado count up to 7 from last Fr...
164      ?!?!?! SO THEY SAYING SHOOTING DOWN THAT AIRPL...
2037     #Update: No casualties have been found and the...
                               ...                        
5067     More than 23,000 people have been evacuated an...
6404                                  i used to be cool :(
10945    Ruh Roh [AS] ([AS]S)chiffty comes under fire f...
9637     If I’m going, it’ll have to be in my Coachella...
3711     Hi there, I'm very sorry to hear this, we are ...
Name: text, Length: 1057, dtype: object

In [None]:
y_predicted = model.predict(X_test)
y_predicted = y_predicted.flatten()




array([0, 0, 1, ..., 0, 0, 1])

In [None]:
y_predicted = np.where(y_predicted > 0.5, 1, 0)
y_predicted

In [None]:
tweets = X_test.tolist()

In [None]:
y_pred =y_predicted.tolist()
# y_pred.count(1)
len(y_pred)==len(X_test)

True

In [None]:
for i in range(0,len(y_pred)):
  if y_pred[i] == 1:
    print (" The disaster tweet is : "+tweets[i])
    # try :
    #   print (" The disaster tweet is : "+X_test[ind])
    # except:
    #   print(" ingorned ")

 The disaster tweet is : Preliminary tornado count up to 7 from last Friday's storms https://t.co/GHsW2dT6pb
 The disaster tweet is : #Update: No casualties have been found and the fire has been put out, according to local authorities. The cause of… https://t.co/130QV9UDSC
 The disaster tweet is : BREAKING: Huge sinkhole swallows bus in northwest China; at least 6 dead, 16 injured, and 4 missing https://t.co/m12xBPxruu
 The disaster tweet is : Drones show Philippines town cloaked in ash from Taal volcano – video https://t.co/rkGl8v4vkN https://t.co/75FXWd9qUt
 The disaster tweet is : ah shit. state capture commission blue lights and sirens are back 😣
 The disaster tweet is : Train derailment causes pileup in Dunn County https://t.co/P0iKOyBFAP
 The disaster tweet is : STORM DAMAGE: A Madison family is looking forward to rebuilding after a tree at least 100 feet tall ripped their home apart.​ htt…
 The disaster tweet is : Taal volcano news – live: Hundreds of thousands urged to flee pos

In [None]:
matrix = confusion_matrix(y_test, y_predicted)
matrix

array([[381, 148],
       [ 91, 437]])

In [None]:

# evaluate loaded model on test data
model.compile(loss='binary_crossentropy', optimizer='rmsprop', metrics=['accuracy'])
score = model.evaluate(X_test, y_test, verbose=0)
print("%s: %.2f%%" % (model.metrics_names[1], score[1]*100))

accuracy: 77.39%
