In [1]:
#import Library
import pickle as pkl
import numpy as np
import matplotlib.pyplot as plt
import math
from sklearn.model_selection import train_test_split
from sklearn.metrics import accuracy_score,confusion_matrix,classification_report
import tensorflow as tf

#Data read

In [2]:
#Data read from json file
from google.colab import drive
drive.mount('/content/gdrive')

Mounted at /content/gdrive


In [3]:
embedding_matrix = []
path = '/content/gdrive/My Drive/Abusive_text_detection/Feature_extraction/Extracted_feature_deep_learning/Embedding_matrix/embedding_matrix_using_'
end_path = '_deep_learning.pkl'
middle_path = ['trainable_word2vec','glove_twitter_25','glove_twitter_50','glove_twitter_100','glove_twitter_200','fasttext_wiki_news_subwords_300']
for i in middle_path:
  if i!= 'trainable_word2vec':
    #Load embedding matrix from google drive
    with open(path+i+end_path,'rb') as f:
      embedding_matrix.append(pkl.load(f))


#Load trainning feature from google drive
with open('/content/gdrive/My Drive/Abusive_text_detection/Feature_extraction/Extracted_feature_deep_learning/training_padded.pkl','rb') as f:
  training_padded = pkl.load(f)

#Load testing feature from google drive
with open('/content/gdrive/My Drive/Abusive_text_detection/Feature_extraction/Extracted_feature_deep_learning/testing_padded.pkl','rb') as f:
  testing_padded = pkl.load(f)

#Load train level from google drive
with open('/content/gdrive/My Drive/Abusive_text_detection/Feature_extraction/Label/train_label.pkl','rb') as f:
  train_level = pkl.load(f)

#Load test level from google drive
with open('/content/gdrive/My Drive/Abusive_text_detection/Feature_extraction/Label/test_label.pkl','rb') as f:
  test_level = pkl.load(f)

#Load vocabulary size from google drive
with open('/content/gdrive/My Drive/Abusive_text_detection/Feature_extraction/Extracted_feature_deep_learning/vocab_size_and_max_length.pkl','rb') as f:
  vocab_size,max_length = pkl.load(f)

In [4]:
#split data as trainning and validation
training_padded, valid_padded, train_level, valid_level = train_test_split(training_padded,train_level, test_size=0.3, random_state=0)

#Plot accuracy graph

In [5]:
#Plot Accuracy Graph
def plot_graph(model_name,single_direction_accuracy,Bi_direction_accuracy,xlevel):
  for i in range(len(xlevel)):
    xlevel[i] = xlevel[i].replace("_", "-")
    if xlevel[i] == 'fasttext-wiki-news-subwords-300':
      xlevel[i] = 'fasttext-wiki-news-\nsubwords-300'
      
  ind = np.arange(len(xlevel))  # the x locations for the groups
  width = 0.30        # the width of the bars

  fig = plt.figure()
  ax = fig.add_subplot(111)

  yvals = single_direction_accuracy
  rects1 = ax.bar(ind, yvals, width, color='b')
  zvals = Bi_direction_accuracy
  rects2 = ax.bar(ind+width, zvals, width, color='orange')
  
  low = min(yvals)
  high = max(yvals)
  plt.ylim([math.ceil(low-0.5*(high-low)),  math.ceil(high+0.5*(high-low))])

  ax.set_ylabel('Accuracy in %')
  ax.set_xlabel('Word_embedding_type')
  ax.set_xticks(ind+(width/2))
  ax.set_xticklabels( (xlevel) )
  ax.set_title('Accuracy of ' +model_name+ ' by different features') 
  ax.legend( (rects1[0], rects2[0]), ('Single Direction', 'Bi-Direction') )

  def autolabel(rects):
      for rect in rects:
          height = rect.get_height()
          ax.annotate('{}%'.format(height),
                      xy=(rect.get_x() + rect.get_width() / 2, height),
                      xytext=(0, 3),  # 3 points vertical offset
                      textcoords="offset points",
                      ha='center', va='bottom')

  autolabel(rects1)
  autolabel(rects2)
  plt.rcParams['figure.figsize'] = (15,8)
  plt.show()

#LSTM

In [10]:
#LSTM trainning,classification and Evaluation
single_direction_accuracy = []
Bi_direction_accuracy = []
for i in range(len(embedding_matrix)):
  #For single directional
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Embedding(vocab_size,embedding_matrix[i].shape[1],weights=[embedding_matrix[i]],input_length=max_length,trainable=False))
  model.add(tf.keras.layers.LSTM(64, dropout=0.1, recurrent_dropout=0.1,return_sequences=True))
  model.add(tf.keras.layers.LSTM(64, dropout=0.1, recurrent_dropout=0.1))
  model.add( tf.keras.layers.Dense(64 , activation='relu' ))
  model.add(tf.keras.layers.Dropout(0.1))
  model.add(tf.keras.layers.Dense(1,activation='sigmoid'))
  model.compile(loss='binary_crossentropy',optimizer='Adam',metrics=['accuracy'])
  # model.summary()
  model.fit(training_padded, train_level, epochs=4, validation_data=(valid_padded, valid_level),batch_size=32)
  LSTM_prediction = model.predict(testing_padded)
  for j in range(len(LSTM_prediction)):
    if LSTM_prediction[j][0]<0.5:
      LSTM_prediction[j][0] = 0
    else:
      LSTM_prediction[j][0] = 1
  Accuracy = accuracy_score(test_level, LSTM_prediction)
  print("when ",middle_path[i].replace("_", "-"),'and single directional LSTM',",\nAccuracy : ",round(Accuracy,4),'\nConfusion Matrix : \n',confusion_matrix(test_level, LSTM_prediction),'\nClassification Report : \n',classification_report(test_level, LSTM_prediction))

  single_direction_accuracy.append(Accuracy)
  
  #For Bi directional
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Embedding(vocab_size,embedding_matrix[i].shape[1],weights=[embedding_matrix[i]],input_length=max_length,trainable=False))
  model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, dropout=0.1, recurrent_dropout=0.1,return_sequences=True)))
  model.add(tf.keras.layers.Bidirectional(tf.keras.layers.LSTM(64, dropout=0.1, recurrent_dropout=0.1)))
  model.add( tf.keras.layers.Dense(64 , activation='relu' ))
  model.add(tf.keras.layers.Dropout(0.1))
  model.add(tf.keras.layers.Dense(1,activation='sigmoid'))
  model.compile(loss='binary_crossentropy',optimizer='Adam',metrics=['accuracy'])
  # model.summary()
  model.fit(training_padded, train_level, epochs=4, validation_data=(valid_padded, valid_level),batch_size=32)
  LSTM_prediction = model.predict(testing_padded)
  for j in range(len(LSTM_prediction)):
    if LSTM_prediction[j][0]<0.5:
      LSTM_prediction[j][0] = 0
    else:
      LSTM_prediction[j][0] = 1
  Accuracy = accuracy_score(test_level, LSTM_prediction)
  print("when ",middle_path[i].replace("_", "-"),'and Bi directional LSTM',",\nAccuracy : ",round(Accuracy,4),'\nConfusion Matrix : \n',confusion_matrix(test_level, LSTM_prediction),'\nClassification Report : \n',classification_report(test_level, LSTM_prediction))

  Bi_direction_accuracy.append(Accuracy)
  

In [None]:
#LSTM graph
plot_graph('LSTM',single_direction_accuracy,Bi_direction_accuracy,middle_path)

#GRU

In [None]:
#GRU trainning,classification and Evaluation
single_direction_accuracy = []
Bi_direction_accuracy = []
for i in range(len(embedding_matrix)):
  #For single directional
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Embedding(vocab_size,embedding_matrix[i].shape[1],weights=[embedding_matrix[i]],input_length=max_length,trainable=False))
  model.add(tf.keras.layers.GRU(64, dropout=0.1, recurrent_dropout=0.1,return_sequences=True))
  model.add(tf.keras.layers.GRU(64, dropout=0.1, recurrent_dropout=0.1))
  model.add( tf.keras.layers.Dense(64 , activation='relu' ))
  model.add(tf.keras.layers.Dropout(0.1))
  model.add(tf.keras.layers.Dense(1,activation='sigmoid'))
  model.compile(loss='binary_crossentropy',optimizer='Adam',metrics=['accuracy'])
  # model.summary()
  model.fit(training_padded, train_level, epochs=4, validation_data=(valid_padded, valid_level),batch_size=32)
  GRU_prediction = model.predict(testing_padded)
  for j in range(len(GRU_prediction)):
    if GRU_prediction[j][0]<0.5:
      GRU_prediction[j][0] = 0
    else:
      GRU_prediction[j][0] = 1
  Accuracy = accuracy_score(test_level, GRU_prediction)
  print("when ",middle_path[i].replace("_", "-"),'and single directional GRU',",\nAccuracy : ",round(Accuracy,4),'\nConfusion Matrix : \n',confusion_matrix(test_level, GRU_prediction),'\nClassification Report : \n',classification_report(test_level, GRU_prediction))

  single_direction_accuracy.append(Accuracy)
  
  #For Bi directional
  model = tf.keras.Sequential()
  model.add(tf.keras.layers.Embedding(vocab_size,embedding_matrix[i].shape[1],weights=[embedding_matrix[i]],input_length=max_length,trainable=False))
  model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64, dropout=0.1, recurrent_dropout=0.1,return_sequences=True)))
  model.add(tf.keras.layers.Bidirectional(tf.keras.layers.GRU(64, dropout=0.1, recurrent_dropout=0.1)))
  model.add( tf.keras.layers.Dense(64 , activation='relu' ))
  model.add(tf.keras.layers.Dropout(0.1))
  model.add(tf.keras.layers.Dense(1,activation='sigmoid'))
  model.compile(loss='binary_crossentropy',optimizer='Adam',metrics=['accuracy'])
  # model.summary()
  model.fit(training_padded, train_level, epochs=4, validation_data=(valid_padded, valid_level),batch_size=32)
  GRU_prediction = model.predict(testing_padded)
  for j in range(len(GRU_prediction)):
    if GRU_prediction[j][0]<0.5:
      GRU_prediction[j][0] = 0
    else:
      GRU_prediction[j][0] = 1
  Accuracy = accuracy_score(test_level, GRU_prediction)
  print("when ",middle_path[i].replace("_", "-"),'and single directional GRU',",\nAccuracy : ",round(Accuracy,4),'\nConfusion Matrix : \n',confusion_matrix(test_level, GRU_prediction),'\nClassification Report : \n',classification_report(test_level, GRU_prediction))

  Bi_direction_accuracy.append(Accuracy)

In [None]:
#GRU graph
plot_graph('GRU',single_direction_accuracy,Bi_direction_accuracy,middle_path)