In [2]:
import numpy as np
import tensorflow as tf
from keras.layers import Dense, Activation, LSTM, Bidirectional, Input, Concatenate, Dot, RepeatVector
from keras.models import Model
from keras.optimizers import Adam
from att_utils import *
from keras.utils import to_categorical
import pandas as pd

In [17]:
df=pd.read_csv("train.csv");
data, label=read_data(df[0:2000]);
max_length, words_set, vocabulary=preprocess_data(data)

In [18]:
Tx=max_length;
Tx

31

In [19]:
Ty=1

In [20]:
X_index=data_to_index(data, vocabulary, words_set, Tx)

In [21]:
X_hot=one_hot(X_index, len(vocabulary))

In [22]:
y_hot=to_categorical(label, num_classes=2)

In [23]:
y_hot.shape

(2000, 2)

In [24]:
repeator=RepeatVector(Tx)
concatenator=Concatenate(axis=-1)
dotor=Dot(axes=1)

In [25]:
def one_step_attention(s_prev, a):
    
    s_prev=repeator(s_prev)
    
    concat=concatenator([s_prev, a])
    
    e=Dense(units=32, activation='tanh')(concat)
    
    e=Dense(units=16, activation='relu')(e)
    
    alphas=Activation("softmax")(e)
    
    context=dotor([alphas, a])
    
    return context   

In [26]:
n_s=64
LSTM_cell=LSTM(units=n_s, return_state=True)

In [27]:
def model(Tx, n_a, n_s, vocabulary_size):
    
    X_input=Input(shape=(Tx, vocabulary_size))
    
    a=Bidirectional(LSTM(units=n_a, return_sequences=True))(X_input)
    
    
    s0=Input(shape=(n_s,))
    c0=Input(shape=(n_s,))
    
        
    context=one_step_attention(s0, a)
        
    s,_,c= LSTM_cell(inputs=context, initial_state=[s0,c0])
        
    out= Dense(units=2, activation='sigmoid')(s)
    
    model=Model(inputs=[X_input, s0, c0], outputs= out)
    
    return model

In [28]:
model=model(Tx=31, n_a=32, n_s=64, vocabulary_size=len(vocabulary))

In [29]:
model.summary()

__________________________________________________________________________________________________
Layer (type)                    Output Shape         Param #     Connected to                     
input_5 (InputLayer)            (None, 64)           0                                            
__________________________________________________________________________________________________
input_4 (InputLayer)            (None, 31, 8235)     0                                            
__________________________________________________________________________________________________
repeat_vector_2 (RepeatVector)  (None, 31, 64)       0           input_5[0][0]                    
__________________________________________________________________________________________________
bidirectional_2 (Bidirectional) (None, 31, 64)       2116608     input_4[0][0]                    
__________________________________________________________________________________________________
concatenat

In [30]:
opt=Adam(lr=0.01, beta_1=0.9, beta_2=0.999, decay= 0.01)

In [31]:
model.compile(optimizer=opt, loss='categorical_crossentropy', metrics=['accuracy'])

In [32]:
m=X_hot.shape[0]
n_s=64
s0=np.zeros((m,n_s))
c0=np.zeros((m, n_s))

model.fit([X_hot, s0, c0], y_hot, epochs=5, batch_size= 32)

Epoch 1/5
Epoch 2/5
Epoch 3/5
Epoch 4/5
Epoch 5/5


<keras.callbacks.History at 0x25b257c1160>

###  Test set

In [33]:
df1=pd.read_csv("test.csv")[0:500]

test_data=[]
m1=df1.shape[0]
    
for i in range(m1):
    
    curr_sentence=df1['text'][i].lower().strip()
    
    curr_sentence=re.sub("[|#@!*.[\]_/{}();+:?%'\']",'',curr_sentence)
    
    test_data.append(curr_sentence)
    
test_data_index=data_to_index(test_data, vocabulary, words_set, Tx)    

In [34]:
X_test_hot=one_hot(test_data_index, len(vocabulary))

In [35]:
loss, acc=model.evaluate([X_hot,s0,c0], y_hot, batch_size=32)
print("loss:%f"%loss)
print('acc: %f'% acc)

loss:0.049211
acc: 0.982500


In [36]:
s0=np.zeros((m1,n_s))
c0=np.zeros((m1, n_s))
pred= model.predict([X_test_hot, s0, c0])

In [37]:
predict=[]
for i in range(pred.shape[0]):
    predict.append(pred[i].argmax())

In [38]:
predict          ## 1= disaster   0= not 

[1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 0,
 0,
 1,
 0,
 0,
 0,
 1,
 0,
 1,
 1,
 1,
 0,
 1,
 0,
 0,
 0,
 0,
 1,
 1,
 0,
 0,
 1,
 0,
 1,
 1,
 0,
 0,
 1,
 1,
 1,
 1,
 1,
 0,
 1,
 1,
 1,
 1,
 1,
 1,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 0,
 1,
 0,
 0,
 0,
 0,
 0,
 1,
