In [1]:
import keras
import tensorflow as tf
import pandas as pd
import numpy as np
import matplotlib.pyplot as plt
from tensorflow.keras.layers import TextVectorization,Embedding

In [2]:
train= pd.read_csv('train.csv')
test= pd.read_csv('test.csv')
train.shape

(7613, 5)

In [3]:
train.target.unique()

array([1, 0])

In [4]:
train

Unnamed: 0,id,keyword,location,text,target
0,1,,,Our Deeds are the Reason of this #earthquake M...,1
1,4,,,Forest fire near La Ronge Sask. Canada,1
2,5,,,All residents asked to 'shelter in place' are ...,1
3,6,,,"13,000 people receive #wildfires evacuation or...",1
4,7,,,Just got sent this photo from Ruby #Alaska as ...,1
...,...,...,...,...,...
7608,10869,,,Two giant cranes holding a bridge collapse int...,1
7609,10870,,,@aria_ahrary @TheTawniest The out of control w...,1
7610,10871,,,M1.94 [01:04 UTC]?5km S of Volcano Hawaii. htt...,1
7611,10872,,,Police investigating after an e-bike collided ...,1


In [5]:
test.head()

Unnamed: 0,id,keyword,location,text
0,0,,,Just happened a terrible car crash
1,2,,,"Heard about #earthquake is different cities, s..."
2,3,,,"there is a forest fire at spot pond, geese are..."
3,9,,,Apocalypse lighting. #Spokane #wildfires
4,11,,,Typhoon Soudelor kills 28 in China and Taiwan


In [6]:
train_shuffle= train.sample(frac=1,random_state=45)

In [7]:
train_shuffle.head()

Unnamed: 0,id,keyword,location,text,target
6761,9687,tornado,,Heather Night and Ava Sparxxx enjoy a wild tee...,0
7196,10308,weapon,,Pulse rifles after weapon tuning? http://t.co/...,0
1505,2173,catastrophic,"Quito, Ecuador.",Learning from the Legacy of a Catastrophic Eru...,1
6702,9600,thunder,"Macon, GA",#thunder outside my house this afternoon #gawx...,1
3028,4349,earthquake,Earth,1.43 earthquake occurred near Mount St. Helens...,1


In [8]:
from sklearn.model_selection import train_test_split

In [9]:
train_sent,val_sent,train_lab,val_lab= train_test_split(train_shuffle['text'].to_numpy(),
                                                        train_shuffle['target'].to_numpy(),test_size=0.1,random_state=45)

In [10]:
train_sent.shape,val_sent.shape,train_lab.shape,val_lab.shape

((6851,), (762,), (6851,), (762,))

In [11]:
#### TextVectorization
text_vec= TextVectorization(max_tokens=10000,
                            standardize='lower_and_strip_punctuation',
                            split='whitespace', output_mode='int',
                            output_sequence_length=15)

In [12]:
text_vec.adapt(train_sent)

In [13]:
sentence='There is flood in the city and cause more damage'

In [14]:
text_vec(sentence)

<tf.Tensor: shape=(15,), dtype=int64, numpy=
array([ 74,   9, 224,   5,   2, 195,   7, 340,  52, 235,   0,   0,   0,
         0,   0])>

In [15]:
#### Embedding
emb= Embedding(input_dim=10000,
              output_dim=128,input_length=15,
              embeddings_initializer="uniform")



In [16]:
emb(text_vec(sentence))

<tf.Tensor: shape=(15, 128), dtype=float32, numpy=
array([[ 0.03152383,  0.00749383,  0.00647137, ...,  0.01070918,
        -0.03642927,  0.03244026],
       [-0.00839276, -0.03746588,  0.04029154, ..., -0.04975302,
         0.01504174, -0.02045944],
       [-0.0313068 , -0.03982543, -0.04656764, ..., -0.02302442,
        -0.01523442,  0.04631789],
       ...,
       [ 0.0301428 , -0.01898012,  0.00104686, ...,  0.04337234,
         0.005689  , -0.00350205],
       [ 0.0301428 , -0.01898012,  0.00104686, ...,  0.04337234,
         0.005689  , -0.00350205],
       [ 0.0301428 , -0.01898012,  0.00104686, ...,  0.04337234,
         0.005689  , -0.00350205]], dtype=float32)>

RNN

In [17]:
rnn= tf.keras.models.Sequential()
rnn.add(tf.keras.Input(shape=(1,),dtype=tf.string))
rnn.add(text_vec)
rnn.add(emb)
rnn.add(tf.keras.layers.SimpleRNN(units=80,return_sequences=True))
rnn.add(tf.keras.layers.SimpleRNN(units=80))
rnn.add(tf.keras.layers.Dense(units=80,activation='relu'))
rnn.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))
rnn.summary()

In [18]:
rnn.compile(optimizer=tf.keras.optimizers.Adam(),loss= tf.keras.losses.binary_crossentropy,metrics=['accuracy'])

In [19]:
history= rnn.fit(train_sent,train_lab,epochs=10,batch_size=30,validation_data=(val_sent,val_lab))

Epoch 1/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 31ms/step - accuracy: 0.6799 - loss: 0.5884 - val_accuracy: 0.8018 - val_loss: 0.4554
Epoch 2/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 23ms/step - accuracy: 0.8889 - loss: 0.2965 - val_accuracy: 0.7795 - val_loss: 0.5441
Epoch 3/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 26ms/step - accuracy: 0.9485 - loss: 0.1436 - val_accuracy: 0.7795 - val_loss: 0.6786
Epoch 4/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 23ms/step - accuracy: 0.9674 - loss: 0.0840 - val_accuracy: 0.7625 - val_loss: 0.8092
Epoch 5/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m8s[0m 33ms/step - accuracy: 0.9744 - loss: 0.0533 - val_accuracy: 0.7769 - val_loss: 0.8588
Epoch 6/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m5s[0m 23ms/step - accuracy: 0.9764 - loss: 0.0517 - val_accuracy: 0.7402 - val_loss: 0.8728
Epoch 7/10
[1m229/2

In [33]:
pred= rnn.predict(test['text'].to_numpy())
pred1=tf.squeeze(np.round(pred))

[1m102/102[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 5ms/step


In [43]:
pred= rnn.predict(val_sent)
pred1=tf.squeeze(np.round(pred))
pred1

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 28ms/step


<tf.Tensor: shape=(762,), dtype=float32, numpy=
array([1., 0., 0., 0., 1., 1., 1., 0., 1., 1., 0., 0., 0., 1., 1., 0., 0.,
       1., 0., 0., 0., 1., 0., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0.,
       0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0.,
       0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 1., 0.,
       1., 1., 1., 1., 0., 1., 1., 0., 1., 0., 1., 1., 0., 0., 0., 1., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 1., 1., 1., 0., 1., 1., 0., 1., 0.,
       0., 1., 0., 1., 1., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1.,
       0., 1., 1., 0., 0., 0., 1., 0., 1., 0., 1., 1., 0., 0., 1., 1., 0.,
       1., 1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0.,
       0., 0., 0., 0., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0., 0., 1., 1., 1., 0.,
       0., 1., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 0., 1.,
       0., 0., 1., 1., 0., 0., 1., 0., 0., 1., 0., 0

In [44]:
from sklearn.metrics import accuracy_score,precision_recall_fscore_support

In [46]:
def calculate_results(true,pred):
  accuracy= accuracy_score(true,pred)
  precision,recall,f1,_= precision_recall_fscore_support(true,pred,average='weighted')
  model_results= {'accuracy':accuracy,
                  'precision':precision,
                  'recall':recall,
                  'f1_score':f1}
  return model_results

In [55]:
precision_recall_fscore_support(val_lab,pred1)

(array([0.75884956, 0.70645161]),
 array([0.79032258, 0.66768293]),
 array([0.77426637, 0.68652038]),
 array([434, 328]))

In [47]:
calculate_results(val_lab,pred1)

{'accuracy': 0.7375328083989501,
 'precision': 0.736295061675669,
 'recall': 0.7375328083989501,
 'f1_score': 0.736496438444073}

LSTM

In [49]:
lstm= tf.keras.models.Sequential()
lstm.add(tf.keras.Input(shape=(1,),dtype=tf.string))
lstm.add(text_vec)
lstm.add(emb)
lstm.add(tf.keras.layers.LSTM(units=80,return_sequences=True))
lstm.add(tf.keras.layers.LSTM(units=80))
lstm.add(tf.keras.layers.Dense(units=80,activation='relu'))
lstm.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))
lstm.summary()

In [50]:
lstm.compile(optimizer=tf.keras.optimizers.Adam(),loss= tf.keras.losses.binary_crossentropy,metrics=['accuracy'])

In [52]:
history= lstm.fit(train_sent,train_lab,epochs=10,batch_size=30,validation_data=(val_sent,val_lab))

Epoch 1/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m14s[0m 44ms/step - accuracy: 0.8674 - loss: 0.3618 - val_accuracy: 0.7992 - val_loss: 0.6013
Epoch 2/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 39ms/step - accuracy: 0.9489 - loss: 0.1590 - val_accuracy: 0.7572 - val_loss: 0.6169
Epoch 3/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 40ms/step - accuracy: 0.9598 - loss: 0.1213 - val_accuracy: 0.7612 - val_loss: 0.8849
Epoch 4/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 45ms/step - accuracy: 0.9692 - loss: 0.0877 - val_accuracy: 0.7664 - val_loss: 1.1357
Epoch 5/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m12s[0m 50ms/step - accuracy: 0.9761 - loss: 0.0580 - val_accuracy: 0.7480 - val_loss: 1.3413
Epoch 6/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m11s[0m 46ms/step - accuracy: 0.9810 - loss: 0.0445 - val_accuracy: 0.7651 - val_loss: 1.2141
Epoch 7/10
[1m229

In [57]:
pred= lstm.predict(val_sent)
pred2=tf.squeeze(np.round(pred))
pred2

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m1s[0m 25ms/step


<tf.Tensor: shape=(762,), dtype=float32, numpy=
array([1., 0., 1., 0., 1., 1., 1., 0., 1., 1., 0., 0., 0., 1., 0., 1., 0.,
       1., 0., 1., 1., 1., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0.,
       0., 1., 0., 1., 0., 1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 1., 0.,
       0., 0., 0., 1., 0., 1., 0., 1., 0., 1., 1., 0., 0., 1., 1., 1., 0.,
       1., 1., 1., 1., 0., 1., 1., 0., 1., 1., 1., 1., 0., 0., 0., 1., 1.,
       0., 0., 0., 0., 0., 0., 0., 0., 1., 1., 1., 0., 1., 1., 1., 1., 0.,
       0., 1., 0., 1., 0., 0., 0., 1., 1., 1., 1., 0., 0., 0., 0., 0., 1.,
       0., 1., 1., 0., 0., 1., 1., 1., 1., 0., 1., 1., 0., 1., 1., 1., 0.,
       1., 1., 0., 1., 0., 1., 0., 0., 0., 1., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 0., 0., 0., 1., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 0.,
       0., 0., 1., 0., 0., 0., 0., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0.,
       1., 1., 0., 0., 1., 0., 0., 0., 0., 1., 0., 0., 0., 1., 0., 1., 1.,
       0., 0., 0., 1., 0., 1., 1., 0., 1., 1., 1., 0

In [58]:
calculate_results(val_lab,pred2)

{'accuracy': 0.7506561679790026,
 'precision': 0.7525729056873343,
 'recall': 0.7506561679790026,
 'f1_score': 0.7512832902929388}

GRU

In [59]:
gru= tf.keras.models.Sequential()
gru.add(tf.keras.Input(shape=(1,),dtype=tf.string))
gru.add(text_vec)
gru.add(emb)
gru.add(tf.keras.layers.GRU(units=80,return_sequences=True))
gru.add(tf.keras.layers.GRU(units=80))
gru.add(tf.keras.layers.Dense(units=80,activation='relu'))
gru.add(tf.keras.layers.Dense(units=1,activation='sigmoid'))
gru.summary()

In [60]:
gru.compile(optimizer=tf.keras.optimizers.Adam(),loss= tf.keras.losses.binary_crossentropy,metrics=['accuracy'])

In [61]:
history= gru.fit(train_sent,train_lab,epochs=10,batch_size=30,validation_data=(val_sent,val_lab))

Epoch 1/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m15s[0m 42ms/step - accuracy: 0.8681 - loss: 0.2797 - val_accuracy: 0.7756 - val_loss: 0.8080
Epoch 2/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 41ms/step - accuracy: 0.9719 - loss: 0.0656 - val_accuracy: 0.7402 - val_loss: 1.3421
Epoch 3/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 41ms/step - accuracy: 0.9741 - loss: 0.0641 - val_accuracy: 0.7572 - val_loss: 1.3947
Epoch 4/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 38ms/step - accuracy: 0.9771 - loss: 0.0444 - val_accuracy: 0.7559 - val_loss: 1.3928
Epoch 5/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m9s[0m 41ms/step - accuracy: 0.9796 - loss: 0.0453 - val_accuracy: 0.7493 - val_loss: 1.4620
Epoch 6/10
[1m229/229[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m10s[0m 42ms/step - accuracy: 0.9830 - loss: 0.0372 - val_accuracy: 0.7559 - val_loss: 1.5251
Epoch 7/10
[1m229/2

In [63]:
pred= lstm.predict(val_sent)
pred3=tf.squeeze(np.round(pred))

[1m24/24[0m [32m━━━━━━━━━━━━━━━━━━━━[0m[37m[0m [1m0s[0m 10ms/step


In [64]:
calculate_results(val_lab,pred3)

{'accuracy': 0.7545931758530183,
 'precision': 0.7543345710548723,
 'recall': 0.7545931758530183,
 'f1_score': 0.7544520836379902}