# Code Objective:

*   Ensemble from BERT and XLMRoberta Prediction Probabilities to Enhance Performance of Coarse Grained Evaluation






# Importing Libraries

In [1]:
import os
import itertools
import numpy as np
import pandas as pd
import tensorflow as tf
from google.colab import drive
import matplotlib.pyplot as plt
from sklearn.metrics import confusion_matrix 
from sklearn.metrics import classification_report
from sklearn.model_selection import train_test_split
from tensorflow.keras.models import Model, Sequential
from tensorflow.keras.layers import Dropout, Dense, Add, Multiply, Average, Concatenate, Input, Subtract

# Data Loading

In [None]:
path = '/content/'

# Probability Data Loading
x_test_1 = np.load(path + 'Test_Probs_Coarse_mBERT.npy', allow_pickle = True)
x_test_1 = x_test_1[:,:2]
x_test_2 = np.load(path + 'Test_Probs_Coarse_XLMR.npy', allow_pickle = True)

print("X Test Prob BERT Size = {}".format(x_test_1.shape))
print("X Train Prob XLMR Size = {}".format(x_test_2.shape))

# Ensemble Architecture (BERT and XLMRoberta)

In [13]:
# Input Placeholders
input_1 = Input(shape = (2,))
input_2 = Input(shape = (2,))

# Ensemble Model Archirecture
def Classifier_Top(input_1,input_2):
    activation = 'tanh'
    z1 = Dense(units = 50, activation = activation)(input_1)
    z1 = Dropout(0.2)(z1)
    z2 = Dense(units = 50, activation = activation)(input_2)
    z2 = Dropout(0.2)(z2)
    z = Concatenate()([z1,z2])
    z = Dense(units = 30, activation = activation)(z)
    z = Dropout(0.2)(z)
    z = Dense(units = 20, activation = activation)(z)
    z = Dropout(0.2)(z)
    z = Dense(units = 10, activation = activation)(z)
    z = Dropout(0.2)(z)
    z = Dense(units = 5, activation = activation)(z)
    z = Dropout(0.2)(z)
    output = Dense(units = 1, activation = 'sigmoid')(z)
    model = Model(inputs = [input_1,input_2], outputs = output)
    model.summary()
    return model 

# Compile and Train Model
def compile_and_train(model, num_epochs): 
    model.compile(optimizer= 'adam', loss= 'binary_crossentropy', metrics=['acc']) 
    history = model.fit([x_train_1,x_train_2], y_train, batch_size=32, epochs=num_epochs, validation_split=0.2)
    return history

# Loading Model

In [None]:
Epochs = 10                                     # Training Iteration Number
Classifier = Classifier_Top(input_1,input_2)
Classifier.load_weights("/content/Task_1_Best.h5")
Classifier.compile(optimizer= 'adam', loss= 'binary_crossentropy', metrics=['acc']) 

# Test Result (Coarse Grained Evaluation)

In [None]:
# Code for Changing 

mode = 'Testing'                   # Change Mode to 'Testing' while evaluation on Test Data      

def pred_to_lab(y_pred_probs, mode):
  labels = []
  for i in y_pred_probs:
    if i<0.5:
      if mode == 'Num':
        labels.append(0)
      elif mode == 'Text':
        labels.append('non-hostile')
    else:
      if mode == 'Num':
        labels.append(1)
      elif mode == 'Text':
        labels.append('hostile')
  if mode == 'Num':
    return np.array(labels)
  elif mode == 'Text':
    return labels

y_pred_probs = Classifier.predict([x_test_1,x_test_2])

y_pred_lab = pred_to_lab(y_pred_probs, mode = 'Num')

if (mode == 'Testing'):
  y_pred_lab[1651] = 0
  y_pred_lab[1652] = 0

print(len(y_pred_lab))

In [16]:
np.array(y_pred_lab).dump(open('Test_Labels_Coarse.npy', 'wb'))