In [120]:
import numpy as np
import matplotlib.pyplot as plt
import pandas as pd
import seaborn as sns
import tensorflow as tf
from tensorflow import keras
from tensorflow.keras import layers
from tensorflow.keras.layers.experimental import preprocessing
from sklearn.model_selection import train_test_split



In [121]:
# Load data
df = pd.read_csv('data/data.csv')

In [122]:
# plot a cofusion matrix
def plot_confusion_matrix(cm, classes,
                          normalize=False,
                          title='Confusion matrix',
                          cmap=plt.cm.Blues):
    """
    This function prints and plots the confusion matrix.
    Normalization can be applied by setting `normalize=True`.
    """
    if normalize:
        cm = cm.astype('float') / cm.sum(axis=1)[:, np.newaxis]
        print("Normalized confusion matrix")
    else:
        print('Confusion matrix, without normalization')

    print(cm)

    plt.imshow(cm, interpolation='nearest', cmap=cmap)
    plt.title(title)
    plt.colorbar()
    tick_marks = np.arange(len(classes))
    plt.xticks(tick_marks, classes, rotation=45)
    plt.yticks(tick_marks, classes)

    fmt = '.2f' if normalize else 'd'
    thresh = cm.max() / 2.
    for i, j in itertools.product(range(cm.shape[0]), range(cm.shape[1])):
        plt.text(j, i, format(cm[i, j], fmt),
                 horizontalalignment="center",
                 color="white" if cm[i, j] > thresh else "black")

    plt.tight_layout()
    plt.ylabel('True label')
    plt.xlabel('Predicted label')

In [123]:
df.head()

Unnamed: 0,Text,Polarity
0,আমাদের সজাগ থাকতে হবে টিকা নেওয়া নিয়ে।,1
1,ভ্যাকসিন ভালো না।,0
2,আমার পরিবারের সকলেই টিকা নিয়েছে।,1
3,টিকা লক্ষণীয় রোগ প্রতিরোধে একইভাবে উচ্চ কার্য...,1
4,অনলাইনে আবেদন করে আমি ভ্যাকসিন নিয়েছি।,1


In [124]:
# train test and validation split
train_val, test = train_test_split(df[['Text','Polarity']].to_numpy(), test_size=0.2)
train, val = train_test_split(train_val, test_size=0.2)










In [125]:
train_sentence,train_label = train[:,0], train[:,1]
train_label = train_label.astype('int')
test_sentence,test_label = test[:,0], test[:,1]
test_label = test_label.astype('int')
val_sentence,val_label = val[:,0], val[:,1]
val_label = val_label.astype('int')




In [126]:
train_sentence.shape,train_label.shape,test_sentence.shape,test_label.shape,val_sentence.shape,val_label.shape

((2436,), (2436,), (762,), (762,), (609,), (609,))

In [127]:
from keras.layers.preprocessing.text_vectorization import TextVectorization

In [128]:
max_voc_size = 10000
max_len = np.round(np.average([len(word.split(' ')) for word in test_sentence]))



tex_vec = TextVectorization(
    max_tokens = max_voc_size,
    output_mode='int',
    output_sequence_length=int(max_len)
)

In [129]:
tex_vec.adapt(train_sentence)

tex_vec([train_sentence[0]])

<tf.Tensor: shape=(1, 11), dtype=int64, numpy=array([[4148,   47,  524,   62, 2234,   58,    3,   34,   21,  187,   12]])>

In [130]:
# build a LSTM
from keras.layers import Embedding

In [131]:
embedding= Embedding(input_dim=max_voc_size,output_dim=128,input_length=max_len)

In [132]:
# build a LSTM model

input = layers.Input(shape=(1,), dtype='string')
x = tex_vec(input)
x= embedding(x)
x = layers.LSTM(64)(x)
output = layers.Dense(1,activation='sigmoid')(x)
model_lstm = keras.Model(input,output,)

In [133]:
model_lstm.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])

In [134]:
model_lstm.summary()

Model: "model_5"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_6 (InputLayer)        [(None, 1)]               0         
                                                                 
 text_vectorization_5 (TextV  (None, 11)               0         
 ectorization)                                                   
                                                                 
 embedding_5 (Embedding)     (None, 11, 128)           1280000   
                                                                 
 lstm_5 (LSTM)               (None, 64)                49408     
                                                                 
 dense_5 (Dense)             (None, 1)                 65        
                                                                 
Total params: 1,329,473
Trainable params: 1,329,473
Non-trainable params: 0
_________________________________________________

In [135]:
from keras.callbacks import EarlyStopping,ReduceLROnPlateau

earlystoper = EarlyStopping(patience=3, restore_best_weights=True)

In [136]:
model_lstm_history = model_lstm.fit(
    train_sentence,
    train_label,
    epochs=10,
    validation_data = (val_sentence,val_label),
    callbacks=[earlystoper]
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


In [137]:
model_lstm_pred_prob= model_lstm.predict(test_sentence)

pred_lstm = tf.squeeze(tf.round(model_lstm_pred_prob))



array([1., 1., 1., 0., 1., 1., 0., 1., 1., 1., 1., 0., 1., 1., 1., 1., 1.,
       0., 0., 1., 1., 1., 0., 0., 1., 0., 1., 0., 1., 1., 1., 0., 1., 0.,
       1., 0., 0., 1., 1., 0., 1., 0., 1., 1., 1., 1., 1., 1., 1., 0., 1.,
       0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0., 0., 1., 1.,
       1., 0., 1., 0., 1., 0., 1., 0., 0., 1., 1., 0., 1., 1., 0., 0., 0.,
       1., 0., 1., 1., 0., 1., 1., 1., 0., 0., 1., 1., 1., 1., 1., 0., 1.,
       1., 1., 0., 0., 0., 1., 0., 0., 0., 0., 0., 0., 0., 0., 1., 0., 0.,
       1., 0., 0., 0., 1., 1., 1., 1., 1., 1., 1., 0., 1., 0., 1., 1., 1.,
       1., 1., 1., 0., 0., 1., 1., 0., 1., 0., 0., 0., 0., 1., 1., 0., 0.,
       1., 1., 1., 1., 0., 1., 1., 1., 1., 1., 1., 0., 0., 1., 0., 1., 1.,
       1., 1., 0., 0., 0., 0., 1., 1., 1., 1., 1., 0., 0., 0., 0., 1., 1.,
       0., 0., 0., 1., 0., 0., 1., 0., 1., 0., 0., 1., 1., 1., 0., 0., 0.,
       0., 0., 1., 1., 1., 1., 1., 0., 1., 1., 0., 0., 1., 0., 1., 1., 1.,
       1., 0., 0., 1., 0.

In [139]:
from sklearn.metrics import accuracy_score

In [140]:
accuracy_score(test_label,pred_lstm)

0.7716535433070866

In [141]:
from keras.utils import plot_model

In [142]:
plot_model(model_lstm, to_file='model_lstm.png', show_shapes=True, show_layer_names=True)

You must install pydot (`pip install pydot`) and install graphviz (see instructions at https://graphviz.gitlab.io/download/) for plot_model to work.


In [143]:
# Build with GRU

input = layers.Input(shape=(1,), dtype='string')
x = tex_vec(input)
x= embedding(x)
x = layers.GRU(64)(x)
output = layers.Dense(1,activation='sigmoid')(x)
model_gru = keras.Model(input,output,)
model_gru.compile(loss='binary_crossentropy',optimizer='adam',metrics=['accuracy'])
model_gru.summary()

Model: "model_6"
_________________________________________________________________
 Layer (type)                Output Shape              Param #   
 input_7 (InputLayer)        [(None, 1)]               0         
                                                                 
 text_vectorization_5 (TextV  (None, 11)               0         
 ectorization)                                                   
                                                                 
 embedding_5 (Embedding)     (None, 11, 128)           1280000   
                                                                 
 gru (GRU)                   (None, 64)                37248     
                                                                 
 dense_6 (Dense)             (None, 1)                 65        
                                                                 
Total params: 1,317,313
Trainable params: 1,317,313
Non-trainable params: 0
_________________________________________________

In [144]:
# train the model
model_gru_history = model_gru.fit(
    train_sentence,
    train_label,
    epochs=10,
    validation_data = (val_sentence,val_label),
    callbacks=[earlystoper]
)


Epoch 1/10
Epoch 2/10
Epoch 3/10
Epoch 4/10
Epoch 5/10


In [146]:
# predict
model_gru_pred_prob= model_gru.predict(test_sentence)

pred_gru = tf.squeeze(tf.round(model_gru_pred_prob))

# accuracy
accuracy_score(test_label,pred_gru)




0.7624671916010499

In [147]:
model_gru.save('./model_gru/')



INFO:tensorflow:Assets written to: ./model_gru/assets


INFO:tensorflow:Assets written to: ./model_gru/assets
