In [None]:
!pip install pretty-confusion-matrix
!pip install transformers
!pip install pytorch-transformers
!pip install -U sentence-transformers

In [None]:
import numpy as np 
import pandas as pd 
import csv
import pickle
import matplotlib.pyplot as plt

In [None]:
import tensorflow as tf
from keras.layers import Bidirectional, Input, Dense, Layer, Dropout, LSTM, RepeatVector, Concatenate, Flatten
from keras.models import Sequential, Model
from tensorflow.python.keras.callbacks import EarlyStopping
from keras import backend as K

In [None]:
print(tf.__version__)

In [None]:
### HYPERPARAMETERS
CONSTRUCTIVENESS_LEVELS = 5
POLITENESS_LEVELS = 3
EPOCHS = 20
MAXLEN = 768  # Since EMBEDDING LAYER returns 768 embeddings vector
LSTM_UNITS = 256
VOCAB_LEN = 1853
EMBEDDING_DIMENSION = 300

# FLAG
is_BiLSTM = True # Flag to automate other pre-processing for With or Without BiLSTM variants

# LABELS 
constructive_labels = ['HNC', 'NC', 'NN', 'C', 'HC']
politeness_labels = ['IMP', 'NP', 'P']

## ADD PATH OF THE PRE-TRAINED MODEL HERE ##
SAVE_PATH = "./Baselines-Single-Task-Model.h5" 

## EMBEDDING LAYER

In [None]:
## SCI-BERT 
name = 'SCIBERT'
embed_model_name = 'gsarti/scibert-nli'

# LOAD DATASET

In [None]:
# LOAD EMBEDS DATASET

def loadPickle(name):
    ## ADD YOUR BASE PATH TO OUR UR. EXAMPLE : ##
    # BASE_PATH = '../iitpolitenesslevels-DATASET/'
    BASE_PATH =''
    
    LOAD_PATH = BASE_PATH+name+'_train.pickle'
    with open(LOAD_PATH, 'rb') as handle:
        train_embeds = pickle.load(handle)
        handle.close()

    LOAD_PATH = BASE_PATH+name+'_test.pickle'
    with open(LOAD_PATH, 'rb') as handle:
        test_embeds = pickle.load(handle)
        handle.close()

    LOAD_PATH = BASE_PATH+name+'_val.pickle'
    with open(LOAD_PATH, 'rb') as handle:
        val_embeds = pickle.load(handle)
        handle.close()

    y_train = pd.read_csv(BASE_PATH+'y_train.csv')
    y_val = pd.read_csv(BASE_PATH+'y_val.csv')
    y_test = pd.read_csv(BASE_PATH+'y_test.csv')

    print('\n***** LOADED '+ name+' *****\n')
    print(f'TRAIN SHAPE : {train_embeds.shape}\nTEST SHAPE : {test_embeds.shape}\nVAL SHAPE : {val_embeds.shape}\nY-TRAIN SHAPE : {y_train.shape}\nY-TEST SHAPE : {y_test.shape}\nY-VAL SHAPE : {y_val.shape}')

    return train_embeds, test_embeds, val_embeds, y_train, y_test, y_val

In [None]:
train_embeds, test_embeds, val_embeds, y_train, y_test, y_val = loadPickle(name)

In [None]:
y_train.head()

### LOAD EMBEDDING MODEL

In [None]:
from transformers import pipeline, AutoTokenizer, AutoModel

In [None]:
if not is_BiLSTM:
    tokenizer = AutoTokenizer.from_pretrained(embed_model_name) 
    embed_model = AutoModel.from_pretrained(embed_model_name)

In [None]:
def sent2embed(model, sent):
    inputs = tokenizer(sent, return_tensors="pt", padding=True, truncation=True, max_length=512)
    outputs = model(**inputs)
    last_hidden_states = outputs.last_hidden_state
    return outputs

# SAMPLE
if not is_BiLSTM:
    sent = ['This work studies the predictive uncertainty issue of deep learning models.']
    outputs = sent2embed(embed_model, sent)
    outputs[1].shape


In [None]:
# PRE-PROCESS
def Embed(reviews, y_train, model):
    embed_arr = []
    start = reviews.index[0]
    for idx in range(len(reviews.index)):
        print(start + idx)
        outputs = sent2embed(model, reviews.loc[start+idx])
        embed_arr.append([outputs[1].detach().numpy()[0]])

    embed_train_arr = tf.stack(embed_arr)

    return embed_train_arr, y_train

# MODEL

### ATTENTION LAYER

In [None]:
class Attention(Layer):
    
    def __init__(self, return_sequences=True, **kwargs):
        super(Attention,self).__init__()
        self.return_sequences = return_sequences
        super(Attention, self).__init__(**kwargs)

    def get_config(self):
        config = super(Attention, self).get_config().copy()
        config.update({
            'return_sequences': self.return_sequences , 
        })
        return config


    def build(self, input_shape):
        
        self.W=self.add_weight(name="att_weight", shape=(input_shape[-1],1),
                               initializer="normal")
        self.b=self.add_weight(name="att_bias", shape=(input_shape[1],1),
                               initializer="zeros")
        
        super(Attention,self).build(input_shape)
        
    def call(self, x):
        e =K.squeeze(K.tanh(K.dot(x,self.W)+self.b),axis=-1)
        a =K.softmax(e)
        a=K.expand_dims(a,axis=-1)
        output = x*a
        
        return K.sum(output, axis=1) 

In [None]:
if not is_BiLSTM:
    ## ONLY IF BiLSTM NOT there -- REDUCE DIMENSION OF DATASET BY 1DIM 
    train_embeds = np.array([row[0] for row in train_embeds ])
    test_embeds = np.array([row[0] for row in test_embeds ])
    val_embeds = np.array([row[0] for row in val_embeds ])


## SINGLE TASK MODEL

In [None]:
embed = Input(shape = (None, MAXLEN), name='INPUT') # with BiLSTM

biLSTM = Bidirectional(LSTM(LSTM_UNITS , return_sequences=False) , name='BILSTM' )(embed)
repeat_op = RepeatVector(1)(biLSTM)

# __________________ #

## ATTENTION 
attentions=[]
for i in range(CONSTRUCTIVENESS_LEVELS):
  x = Attention(return_sequences=False)(repeat_op)
  attentions.append(x)

concate_attention  = Concatenate()(attentions)
flatten = Flatten()(concate_attention)

# __________________ #

## FFNN
b1_x =  Dense(2*MAXLEN, activation='relu', name='CONSTRUCTIVENESS_DENSE')(flatten)
b1_x = Dropout(0.2)(b1_x)
b1_x =  Dense(MAXLEN, activation='relu', name='CONSTRUCTIVENESS_DENSE_1')(b1_x)
b1_x = Dropout(0.2)(b1_x)
category_output = Dense(CONSTRUCTIVENESS_LEVELS, activation='softmax', name='CONSTRUCTIVENESS_OUTPUT')(b1_x)


model = Model(inputs=embed, outputs= category_output)

## COMPILE MODEL

In [None]:
loss ='categorical_crossentropy' 

model.compile(loss= loss,
                optimizer='adam', 
                metrics=['accuracy']) 


model.summary()


In [None]:
tf.keras.utils.plot_model(model, to_file='single-task_model.png', show_shapes=True)

## TRAIN MODEL

In [None]:
# INCLUDE THIS CODE IF KAGGLE IS THROWING RUN-TIME ERROR

physical_devices = tf.config.list_physical_devices('GPU') 
for device in physical_devices:
    tf.config.experimental.set_memory_growth(device, True)

In [None]:
# SINGLE-TASK MODEL

history = model.fit(train_embeds, 
                    y = y_train[constructive_labels].values,  
                    validation_data = (test_embeds, y_test[constructive_labels].values),
                   epochs = EPOCHS,
                   shuffle=True,
                   verbose=2
                )


In [None]:
# SAVE MODEL
model.save(SAVE_PATH, overwrite=True, save_format="tf")
print("Weights Saved")

In [None]:
# LOAD MODEL
from keras.models import load_model
LOAD_PATH = SAVE_PATH
model = load_model(LOAD_PATH, custom_objects={'Attention': Attention})

print("MODEL LOADED\n\n")

# SINGLE-TASK MODEL
model.evaluate(test_embeds,y_test[constructive_labels].values)

## ANALYSIS

In [None]:
history.history.keys()

## VALIDATION

In [None]:
metrics_keys = list(history.history.keys())

loss = history.history[metrics_keys[0]]
acc = history.history[metrics_keys[1]]
val_loss = history.history[metrics_keys[2]]
val_accuracy = history.history[metrics_keys[3]]


print("\nACCURACY : " ,acc)
print("\nVAL ACCURACY : " ,val_accuracy)
print("\nLOSS : " , loss)
print("\nVAL LOSS : " , val_loss)

In [None]:
metrics = history.history['loss']
epochs_range = range(1, len(metrics) + 1) 
xticks = range(1, len(metrics) + 1, 2)

plt.figure(figsize=(15, 30))

# Train vs Val Acc 
plt.subplot(2, 1, 1)
plt.plot(epochs_range, loss, label='Training Loss')
plt.plot(epochs_range, val_loss, label='Validation Loss')
plt.title('Politeness Training vs Validation Loss', fontsize=20)
plt.xticks(xticks)
plt.xlabel('Epochs', fontsize=19, labelpad=20)
plt.ylabel('Loss', fontsize=19, labelpad=20)

plt.tick_params(axis='both', which='major', labelsize=15)
plt.legend(fontsize=17, loc='upper right')


# Train vs Val Loss 
plt.subplot(2, 1, 2)
plt.plot(epochs_range, acc, label='Training Accuracy')
plt.plot(epochs_range, val_accuracy, label='Validation Accuracy')
plt.title('Politeness Training vs Validation Accuracy', fontsize=20)
plt.xlabel('Epochs', fontsize=19, labelpad=20)
plt.ylabel('Loss', fontsize=19, labelpad=20)
plt.xticks(xticks)
plt.tick_params(axis='both', which='major', labelsize=15)
plt.legend(fontsize=17, loc='lower right')


plt.show()

In [None]:
# POLITENESS

## TEST
y_pred_test = model.predict(test_embeds)
y_pred_test_cons_idx = np.argmax(y_pred_test, axis=1)
y_test_true_cons_idx =np.argmax(y_test[constructive_labels].values, axis=1)


## VAL
y_pred_val = model.predict(val_embeds)
y_pred_val_cons_idx = np.argmax(y_pred_val, axis=1)
y_val_true_cons_idx =np.argmax(y_val[constructive_labels].values, axis=1)

In [None]:
# CONFUSION MATRIX
import seaborn as sns
from sklearn.metrics import confusion_matrix as cm

cm_cons_test = cm(y_test_true_cons_idx, y_pred_test_cons_idx)
cm_cons_val = cm(y_val_true_cons_idx, y_pred_val_cons_idx)


In [None]:
# CLASS-WISE ACCURACY

## CONSTRUCTIVENESS
cm_cons_test_acc = cm_cons_test.astype('float') / cm_cons_test.sum(axis=1)[:, np.newaxis]
cm_cons_test_acc =cm_cons_test_acc.diagonal()

cm_cons_val_acc = cm_cons_val.astype('float') / cm_cons_val.sum(axis=1)[:, np.newaxis]
cm_cons_val_acc =cm_cons_val_acc.diagonal()


# ------------------------ #

# TEST
print('*** CLASS-WISE  TEST ACCURACY : **** \n')
print("CLASS\tACCURACY")
for cat, acc in zip(constructive_labels, cm_cons_test_acc):
    print('',cat,"\t",round(acc, 3)) 

# ------------------------ #

# VAL    

print('\n\n*** CLASS-WISE  VAL ACCURACY : **** \n')
print("\nCLASS\tACCURACY")
for cat, acc in zip(constructive_labels, cm_cons_val_acc):
    print('',cat,"\t",round(acc, 3))
    

In [None]:
def plotHeatmap(cm, branch, data_type, labels):
    plt.figure(figsize=(25, 10))
    sns.heatmap(cm, square=True, annot=True, cmap='Blues', fmt='d', cbar=True, yticklabels=labels, xticklabels=labels)
    plt.xlabel('Predicted Values', fontsize=20, labelpad=20)
    plt.ylabel('Actual Values', fontsize=20, labelpad=20)
    title = branch+' Confusion Matrix for '+data_type+' Data'
    plt.title(title, fontsize=20)
    plt.tick_params(axis='both', which='major', labelsize=15)
    plt.show()

In [None]:
plotHeatmap(cm_cons_test, 'Constructiveness', 'Testing', constructive_labels)

In [None]:
plotHeatmap(cm_cons_val, 'Constructiveness', 'Validation', constructive_labels)

--------------

In [None]:
from pretty_confusion_matrix import pp_matrix

def customCM(cm, labels):
    df_cm = pd.DataFrame(cm, index=labels, columns=labels)
    cmap = 'PuRd'
    pp_matrix(df_cm, cmap=cmap)

In [None]:
customCM(cm_cons_test, constructive_labels)

In [None]:
customCM(cm_cons_val, constructive_labels)

In [None]:
# CLASSIFICATION REPORT
from sklearn.metrics import classification_report

def class_report(y_test_true_idx, y_pred_test_idx,y_val_true_idx, y_pred_val_idx , labels):
    print('\n\n**** TESTING ****\n\n', classification_report(y_test_true_idx, y_pred_test_idx, target_names=labels))
    print('\n\n**** VALIDATION ****\n\n',classification_report(y_val_true_idx, y_pred_val_idx , target_names=labels))

In [None]:
class_report(y_test_true_cons_idx, y_pred_test_cons_idx, y_val_true_cons_idx, y_pred_val_cons_idx , constructive_labels)